Example #1
0
        public void TestAlignedSequenceToString()
        {
            IList <ISequence> seqList = new List <ISequence>();

            seqList.Add(new Sequence(Alphabets.DNA, "CAAAAGGGATTGC---TGTTGGAGTGAATGCCATTACCTACCGGCTAGGAGGAGTAGTACAAAGGAGC"));
            seqList.Add(new Sequence(Alphabets.DNA, "CAAAAGGGATTGC---"));
            seqList.Add(new Sequence(Alphabets.DNA, "TAGTAGTTCTGCTATATACATTTG"));
            seqList.Add(new Sequence(Alphabets.DNA, "GTTATCATGCGAACAATTCAACAGACACTGTAGA"));
            NucmerPairwiseAligner num = new NucmerPairwiseAligner();

            num.BreakLength       = 8;
            num.FixedSeparation   = 0;
            num.MinimumScore      = 0;
            num.MaximumSeparation = 0;
            num.SeparationFactor  = 0;
            num.LengthOfMUM       = 8;
            IList <ISequence>          sequenceList = seqList;
            IList <ISequenceAlignment> alignmentObj = num.Align(sequenceList);
            AlignedSequence            alignedSeqs  = (AlignedSequence)alignmentObj[0].AlignedSequences[0];

            string actualString   = alignedSeqs.ToString();
            string expectedString = "CAAAAGGGATTGC---\r\nCAAAAGGGATTGC---\r\nCAAAAGGGATTGC---\r\n";

            Assert.AreEqual(actualString, expectedString);
        }
Example #2
0
        public void ValidateAlignedSequenceToString()
        {
            IList <ISequence> seqList = new List <ISequence>();
            string            actualAlignedSeqString = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                                       Constants.AlignedSeqActualNode);

            seqList.Add(new Sequence(Alphabets.DNA,
                                     actualAlignedSeqString));
            seqList.Add(new Sequence(Alphabets.DNA, "CAAAAGGGATTGC---"));
            seqList.Add(new Sequence(Alphabets.DNA, "TAGTAGTTCTGCTATATACATTTG"));
            seqList.Add(new Sequence(Alphabets.DNA, "GTTATCATGCGAACAATTCAACAGACACTGTAGA"));
            NucmerPairwiseAligner num = new NucmerPairwiseAligner();

            num.BreakLength       = 8;
            num.FixedSeparation   = 0;
            num.MinimumScore      = 0;
            num.MaximumSeparation = 0;
            num.SeparationFactor  = 0;
            num.LengthOfMUM       = 8;
            IList <ISequence>          sequenceList = seqList;
            IList <ISequenceAlignment> alignmentObj = num.Align(sequenceList);
            AlignedSequence            alignedSeqs  = (AlignedSequence)alignmentObj[0].AlignedSequences[0];

            string actualString   = alignedSeqs.ToString();
            string expectedString = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                    Constants.AlignedSeqExpectedNode);

            Assert.AreEqual(actualString.Replace("\r\n", ""), expectedString.Replace("\\r\\n", ""));
        }
Example #3
0
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="inputSequences">Input sequences</param>
        /// <returns>Alignment results</returns>
        public IList <Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences)
        {
            // Reset all our data in case this same instance is used multiple times.
            _alignedSequences = _alignedSequencesA = _alignedSequencesB = _alignedSequencesC = null;
            _alignmentScore   = _alignmentScoreA = _alignmentScoreB = _alignmentScoreC = float.MinValue;

            // Get our list of sequences.
            List <ISequence> sequences = inputSequences.ToList();

            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty input sequences");
            }

            // Assign the gap open/extension cost if it hasn't been assigned.
            if (GapOpenCost == 0)
            {
                GapOpenCost = -4;
            }
            if (GapExtensionCost == 0)
            {
                GapExtensionCost = -1;
            }

            Performance.Start();

            // Assign the alphabet
            SetAlphabet(sequences, SimilarityMatrix, true);
            MsaUtils.SetProfileItemSets(_alphabet);

            Performance.Snapshot("Start Aligning");

            // Work...
            DoAlignment(sequences);

            // just for the purpose of integrating PW and MSA with the same output
            var alignment = new Alignment.SequenceAlignment();
            IAlignedSequence aSequence = new AlignedSequence();

            foreach (var alignedSequence in AlignedSequences)
            {
                aSequence.Sequences.Add(alignedSequence);
            }
            foreach (var inputSequence in sequences)
            {
                alignment.Sequences.Add(inputSequence);
            }
            alignment.AlignedSequences.Add(aSequence);
            return(new List <Alignment.ISequenceAlignment>()
            {
                alignment
            });
        }
Example #4
0
        /// <summary>
        ///     Validate aligned sequence instance using different aligners
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="aligner">sw/nw/pw aligners</param>
        private void ValidateAlignedSequenceCtor(string nodeName, ISequenceAligner aligner)
        {
            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                          Constants.AlphabetNameNode));
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

            // Create input sequences
            var inputSequences = new List <ISequence>();

            inputSequences.Add(new Sequence(alphabet, origSequence1));
            inputSequences.Add(new Sequence(alphabet, origSequence2));

            // Get aligned sequences
            IAlignedSequence           alignedSequence = new AlignedSequence();
            IList <ISequenceAlignment> alignment       = aligner.Align(inputSequences);

            // add aligned sequence and metadata information
            for (int iseq = 0; iseq < alignment[0].AlignedSequences[0].Sequences.Count; iseq++)
            {
                alignedSequence.Sequences.Add(alignment[0].AlignedSequences[0].Sequences[iseq]);
            }

            foreach (string key in alignment[0].AlignedSequences[0].Metadata.Keys)
            {
                alignedSequence.Metadata.Add(key, alignment[0].AlignedSequences[0].Metadata[key]);
            }

            // Validate the alignedsequence properties
            for (int index = 0; index < alignment[0].AlignedSequences[0].Sequences.Count; index++)
            {
                Assert.AreEqual(alignment[0].AlignedSequences[0].Sequences[index].ToString(),
                                alignedSequence.Sequences[index].ToString());
            }

            foreach (string key in alignment[0].AlignedSequences[0].Metadata.Keys)
            {
                Assert.AreEqual(alignment[0].AlignedSequences[0].Metadata[key],
                                alignedSequence.Metadata[key]);
            }

            ApplicationLog.WriteLine(@"Alignment BVT : Validation of aligned sequence ctor completed successfully");
        }
        /// <summary>
        /// Parses a single biological sequence alignment text from a stream.
        /// </summary>
        /// <param name="reader">Reader</param>
        /// <returns>Sequence</returns>
        private ISequenceAlignment ParseOne(StreamReader reader)
        {
            // no empty files allowed
            if (line == null)
            {
                ReadNextLine(reader);
            }

            if (line == null)
            {
                throw new InvalidDataException(Properties.Resource.IONoTextToParse);
            }

            if (!line.StartsWith("CLUSTAL", StringComparison.OrdinalIgnoreCase))
            {
                throw new InvalidDataException(
                          string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name));
            }

            ReadNextLine(reader);  // Skip blank lines until we get to the first block.

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            skipBlankLines = false;

            var       mapIdToSequence   = new Dictionary <string, Tuple <ISequence, List <byte> > >();
            IAlphabet alignmentAlphabet = null;
            bool      isFirstBlock      = true;
            bool      inBlock           = false;
            var       endOfBlockSymbols = new HashSet <char> {
                '*', ' ', '.', '+', ':'
            };

            while (reader.Peek() != -1)
            {
                // Blank line or consensus line signals end of block.
                if (String.IsNullOrEmpty(line) || line.ToCharArray().All(endOfBlockSymbols.Contains))
                {
                    if (inBlock)
                    {
                        // Blank line signifies end of block
                        inBlock      = false;
                        isFirstBlock = false;
                    }
                }
                else // It's not a blank or consensus line.
                {
                    // It's a data line in a block.
                    // Lines begin with sequence id, then the sequence segment, and optionally a number, which we will ignore
                    string[] tokens   = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); // (char[])null uses whitespace delimiters
                    string   id       = tokens[0];
                    string   data     = tokens[1].ToUpperInvariant();
                    byte[]   byteData = Encoding.UTF8.GetBytes(data);
                    Tuple <ISequence, List <byte> > sequenceTuple;
                    IAlphabet alphabet = Alphabet;

                    inBlock = true;
                    if (isFirstBlock)
                    {
                        if (null == alphabet)
                        {
                            alphabet = Alphabets.AutoDetectAlphabet(byteData, 0, byteData.Length, alphabet);

                            if (null == alphabet)
                            {
                                throw new InvalidDataException(string.Format(
                                                                   CultureInfo.InvariantCulture,
                                                                   Properties.Resource.InvalidSymbolInString,
                                                                   data));
                            }

                            if (null == alignmentAlphabet)
                            {
                                alignmentAlphabet = alphabet;
                            }
                            else
                            {
                                if (alignmentAlphabet != alphabet)
                                {
                                    throw new InvalidDataException(string.Format(
                                                                       CultureInfo.CurrentCulture,
                                                                       Properties.Resource.SequenceAlphabetMismatch));
                                }
                            }
                        }

                        sequenceTuple = new Tuple <ISequence, List <byte> >(
                            new Sequence(alphabet, "")
                        {
                            ID = id
                        },
                            new List <byte>());
                        sequenceTuple.Item2.AddRange(byteData);

                        mapIdToSequence.Add(id, sequenceTuple);
                    }
                    else
                    {
                        if (!mapIdToSequence.ContainsKey(id))
                        {
                            throw new InvalidDataException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.ClustalUnknownSequence, id));
                        }

                        sequenceTuple = mapIdToSequence[id];
                        sequenceTuple.Item2.AddRange(byteData);
                    }
                }

                ReadNextLine(reader);
            }

            var sequenceAlignment = new SequenceAlignment();
            var alignedSequence   = new AlignedSequence();

            sequenceAlignment.AlignedSequences.Add(alignedSequence);
            foreach (var alignmentSequenceTuple in mapIdToSequence.Values)
            {
                alignedSequence.Sequences.Add(
                    new Sequence(alignmentSequenceTuple.Item1.Alphabet, alignmentSequenceTuple.Item2.ToArray())
                {
                    ID = alignmentSequenceTuple.Item1.ID
                });
            }

            return(sequenceAlignment);
        }
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="inputSequences">Input sequences</param>
        /// <returns>Alignment results</returns>
        public IList<Alignment.ISequenceAlignment> Align(IEnumerable<ISequence> inputSequences)
        {
            // Reset all our data in case this same instance is used multiple times.
            this.AlignedSequences = this.AlignedSequencesA = this.AlignedSequencesB = this.AlignedSequencesC = null;
            this.AlignmentScore = this.AlignmentScoreA = this.AlignmentScoreB = this.AlignmentScoreC = float.MinValue;

            // Get our list of sequences.
            List<ISequence> sequences = inputSequences.ToList();
            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty input sequences");
            }

            // Assign the gap open/extension cost if it hasn't been assigned.
            if (GapOpenCost == 0)
                GapOpenCost = -4;
            if (GapExtensionCost == 0)
                GapExtensionCost = -1;

            StartLog();

            // Assign the alphabet
            SetAlphabet(sequences, SimilarityMatrix, true);
            MsaUtils.SetProfileItemSets(this.alphabet);

            ReportLog("Start Aligning");

            // Work...
            DoAlignment(sequences);

            // just for the purpose of integrating PW and MSA with the same output
            var alignment = new Alignment.SequenceAlignment();
            IAlignedSequence aSequence = new AlignedSequence();
            foreach (var alignedSequence in AlignedSequences)
                aSequence.Sequences.Add(alignedSequence);
            foreach (var inputSequence in sequences)
                alignment.Sequences.Add(inputSequence);
            alignment.AlignedSequences.Add(aSequence);
            return new List<Alignment.ISequenceAlignment>() {alignment};
        }
Example #7
0
        /// <summary>
        /// Parses a single biological sequence alignment text from a reader.
        /// </summary>
        /// <param name="reader">A reader for a biological sequence alignment text.</param>
        /// <returns>The parsed ISequenceAlignment object.</returns>
        ISequenceAlignment ParseOne(TextReader reader)
        {
            ReadNextLine(reader);
            if (line == null)
            {
                throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
            }

            this.ParseHeader(reader);

            var            alignedSequence = new AlignedSequence();
            IList <string> ids             = null;
            bool           isInBlock       = true;

            if (this.line.StartsWith("begin", StringComparison.OrdinalIgnoreCase))
            {
                while (this.line != null && isInBlock)
                {
                    if (string.IsNullOrEmpty(this.line.Trim()))
                    {
                        this.ReadNextLine(reader);
                        continue;
                    }

                    string blockName = GetTokens(this.line)[1];

                    switch (blockName.ToUpperInvariant())
                    {
                    case "TAXA":
                    case "TAXA;":
                        // This block contains the count of sequence & title of each sequence
                        ids = this.ParseTaxaBlock(reader);
                        break;

                    case "CHARACTERS":
                    case "CHARACTERS;":
                        // Block contains sequences
                        Dictionary <string, string> dataSet = this.ParseCharacterBlock(reader, ids);
                        IAlphabet alignmentAlphabet         = null;

                        foreach (string id in ids)
                        {
                            IAlphabet alphabet = this.Alphabet;
                            string    data     = dataSet[id];

                            if (null == alphabet)
                            {
                                byte[] dataArray = data.ToByteArray();
                                alphabet = Alphabets.AutoDetectAlphabet(dataArray, 0, dataArray.Length, null);

                                if (null == alphabet)
                                {
                                    throw new InvalidDataException(string.Format(
                                                                       CultureInfo.InvariantCulture,
                                                                       Properties.Resource.InvalidSymbolInString,
                                                                       data));
                                }

                                if (null == alignmentAlphabet)
                                {
                                    alignmentAlphabet = alphabet;
                                }
                                else
                                {
                                    if (alignmentAlphabet != alphabet)
                                    {
                                        throw new InvalidDataException(string.Format(
                                                                           CultureInfo.InvariantCulture,
                                                                           Properties.Resource.SequenceAlphabetMismatch));
                                    }
                                }
                            }

                            alignedSequence.Sequences.Add(new Sequence(alphabet, data)
                            {
                                ID = id
                            });
                        }

                        break;

                    case "END":
                    case "END;":
                        // Have reached the end of block
                        isInBlock = false;
                        break;

                    default:
                        // skip this block
                        while (this.line != null)
                        {
                            this.ReadNextLine(reader);
                            if (0 == string.Compare(this.line, "end;", StringComparison.OrdinalIgnoreCase))
                            {
                                break;
                            }
                        }
                        break;
                    }

                    this.ReadNextLine(reader);
                }
            }

            ISequenceAlignment sequenceAlignment = new SequenceAlignment();

            sequenceAlignment.AlignedSequences.Add(alignedSequence);
            return(sequenceAlignment);
        }