Exemplo n.º 1
0
 /// <summary>
 /// Initializes a new instance of the PairwiseSequenceAlignment class
 /// Constructs PairwiseSequenceAlignment with input sequences.
 /// </summary>
 /// <param name="firstSequence">First input sequence.</param>
 /// <param name="secondSequence">Second input sequence.</param>
 public PairwiseSequenceAlignment(ISequence firstSequence, ISequence secondSequence)
 {
     seqAlignment = new SequenceAlignment();
     seqAlignment.Sequences.Add(firstSequence);
     seqAlignment.Sequences.Add(secondSequence);
     alignedSequences = new List <PairwiseAlignedSequence>();
 }
Exemplo n.º 2
0
        /// <summary>
        /// Initializes a new instance of the PairwiseSequenceAlignment class
        /// Constructor for deserialization.
        /// </summary>
        /// <param name="info">Serialization Info.</param>
        /// <param name="context">Streaming context.</param>
        protected PairwiseSequenceAlignment(SerializationInfo info, StreamingContext context)
        {
            if (info == null)
            {
                throw new ArgumentNullException("info");
            }

            _alignedSequences = (List <PairwiseAlignedSequence>)info.GetValue("AlignedSeqs", typeof(List <PairwiseAlignedSequence>));
            _seqAlignment     = (SequenceAlignment)info.GetValue("base", typeof(SequenceAlignment));
            Documentation     = info.GetValue("Doc", typeof(object));
            IsReadOnly        = info.GetBoolean("IsReadOnly");
        }
Exemplo n.º 3
0
        /// <summary>
        /// Initializes a new instance of the PairwiseSequenceAlignment class.
        /// Internal constructor to create new instance of PairwiseSequenceAlignment
        /// from ISequenceAlignment.
        /// </summary>
        /// <param name="seqAlignment">ISequenceAlignment instance.</param>
        internal PairwiseSequenceAlignment(ISequenceAlignment seqAlignment)
        {
            _seqAlignment     = new SequenceAlignment(seqAlignment);
            _alignedSequences = new List <PairwiseAlignedSequence>();
            foreach (AlignedSequence alignedSeq in seqAlignment.AlignedSequences)
            {
                _alignedSequences.Add(new PairwiseAlignedSequence(alignedSeq));
            }

            // Clear the AlignedSequences in the _seqAlignment as this no longer needed.
            if (!_seqAlignment.AlignedSequences.IsReadOnly)
            {
                _seqAlignment.AlignedSequences.Clear();
            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// Parses a single biological sequence alignment text from a reader.
        /// </summary>
        /// <param name="reader">A reader for a biological sequence alignment text.</param>
        /// <returns>The parsed ISequenceAlignment object.</returns>
        ISequenceAlignment ParseOne(TextReader reader)
        {
            if (line == null)
                ReadNextLine(reader);

            // no empty files allowed
            if (line == null)
            {
                throw new InvalidDataException(Properties.Resource.IONoTextToParse);
            }

            // Parse first line
            IList<string> tokens = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries);
            if (2 != tokens.Count)
            {
                throw new InvalidDataException(
                    string.Format(CultureInfo.CurrentCulture, 
                        Properties.Resource.INVALID_INPUT_FILE, this.Name));
            }

            bool isFirstBlock = true;
            int sequenceCount;
            int sequenceLength;
            IList<Tuple<Sequence, List<byte>>> data = new List<Tuple<Sequence, List<byte>>>();
            IAlphabet alignmentAlphabet = null;

            sequenceCount = Int32.Parse(tokens[0], CultureInfo.InvariantCulture);
            sequenceLength = Int32.Parse(tokens[1], CultureInfo.InvariantCulture);

            ReadNextLine(reader);  // Skip blank lines until we get to the first block.

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            skipBlankLines = false;

            while (reader.Peek() != -1)
            {
                if (string.IsNullOrWhiteSpace(line))
                {
                    ReadNextLine(reader);
                    continue;
                }

                for (int index = 0; index < sequenceCount; index++)
                {
                    if (isFirstBlock)
                    {
                        // First 10 characters are sequence ID, remaining is the first block of sequence
                        // Note that both may contain whitespace, and there may be no whitespace between them.
                        if (line.Length <= 10)
                        {
                            throw new Exception(string.Format(
                                CultureInfo.CurrentCulture, 
                                Properties.Resource.INVALID_INPUT_FILE, this.Name));
                        }
                        string id = line.Substring(0, 10).Trim();
                        string sequenceString = line.Substring(10).Replace(" ","");
                        byte[] sequenceBytes = Encoding.UTF8.GetBytes(sequenceString);

                        IAlphabet alphabet = Alphabet;
                        if (null == alphabet)
                        {
                            alphabet = Alphabets.AutoDetectAlphabet(sequenceBytes, 0, sequenceBytes.Length, alphabet);

                            if (null == alphabet)
                            {
                                throw new InvalidDataException(string.Format(
                                        CultureInfo.InvariantCulture,
                                        Properties.Resource.InvalidSymbolInString,
                                        sequenceString));
                            }

                            if (null == alignmentAlphabet)
                            {
                                alignmentAlphabet = alphabet;
                            }
                            else
                            {
                                if (alignmentAlphabet != alphabet)
                                {
                                    throw new InvalidDataException(Properties.Resource.SequenceAlphabetMismatch);
                                }
                            }
                        }

                        var sequenceStore = new Tuple<Sequence, List<byte>>(
                            new Sequence(alphabet, string.Empty){ ID = id }, 
                            new List<byte>());

                        sequenceStore.Item2.AddRange(sequenceBytes);
                        data.Add(sequenceStore);
                    }
                    else
                    {
                        Tuple<Sequence, List<byte>> sequence = data[index];
                        byte[] sequenceBytes = Encoding.UTF8.GetBytes(line.Replace(" ",""));
                        sequence.Item2.AddRange(sequenceBytes);
                    }

                    ReadNextLine(reader);
                }

                // Reset the first block flag
                isFirstBlock = false;
            }

            // Validate for the count of sequence
            if (sequenceCount != data.Count)
            {
                throw new InvalidDataException(Properties.Resource.SequenceCountMismatch);
            }

            SequenceAlignment sequenceAlignment = new SequenceAlignment();
            sequenceAlignment.AlignedSequences.Add(new AlignedSequence());

            foreach (var dataSequence in data)
            {
                // Validate for the count of sequence
                if (sequenceLength != dataSequence.Item2.Count)
                {
                    throw new InvalidDataException(Properties.Resource.SequenceLengthMismatch);
                }

                sequenceAlignment.AlignedSequences[0].Sequences.Add(
                    new Sequence(dataSequence.Item1.Alphabet, dataSequence.Item2.ToArray()) { ID = dataSequence.Item1.ID });
            }

            return sequenceAlignment;
        }
Exemplo n.º 5
0
 /// <summary>
 /// Initializes a new instance of the PairwiseSequenceAlignment class
 /// Constructs an empty PairwiseSequenceAlignment.
 /// </summary>
 public PairwiseSequenceAlignment()
 {
     seqAlignment = new SequenceAlignment();
     IsReadOnly   = false; // initializes to false by default, but make it explicit for good style.
 }
Exemplo n.º 6
0
 /// <summary>
 /// Initializes a new instance of the PairwiseSequenceAlignment class
 /// Constructs PairwiseSequenceAlignment with input sequences.
 /// </summary>
 /// <param name="firstSequence">First input sequence.</param>
 /// <param name="secondSequence">Second input sequence.</param>
 public PairwiseSequenceAlignment(ISequence firstSequence, ISequence secondSequence)
 {
     seqAlignment     = new SequenceAlignment(new [] { firstSequence, secondSequence });
     alignedSequences = new List <PairwiseAlignedSequence>();
     IsReadOnly       = false; // initializes to false by default, but make it explicit for good style.
 }
Exemplo n.º 7
0
        public void ValidateSequenceAlignmentToString()
        {
            ISequenceAligner aligner = SequenceAligners.NeedlemanWunsch;
            IAlphabet alphabet = Alphabets.Protein;
            string origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN";
            string origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN";

            // Create input sequences
            var inputSequences = new List<ISequence>();
            inputSequences.Add(new Sequence(alphabet, origSequence1));
            inputSequences.Add(new Sequence(alphabet, origSequence2));

            // Get aligned sequences
            IList<ISequenceAlignment> alignments = aligner.Align(inputSequences);
            ISequenceAlignment alignment = new SequenceAlignment();
            for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++)
            {
                alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]);
            }

            foreach (string key in alignments[0].Metadata.Keys)
            {
                alignment.Metadata.Add(key, alignments[0].Metadata[key]);
            }

            string actualSequenceAlignmentString = alignment.ToString();
            string ExpectedSequenceAlignmentString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                                     Constants
                                                                                         .SequenceAlignmentExpectedNode);

            Assert.AreEqual(ExpectedSequenceAlignmentString.Replace("\\r\\n", ""),
                actualSequenceAlignmentString.Replace(System.Environment.NewLine, ""));
        }
Exemplo n.º 8
0
        /// <summary>
        /// Parses a single biological sequence alignment text from a reader.
        /// </summary>
        /// <param name="reader">A reader for a biological sequence alignment text.</param>
        /// <returns>The parsed ISequenceAlignment object.</returns>
        ISequenceAlignment ParseOne(TextReader reader)
        {
            ReadNextLine(reader);
            if (line == null)
            {
                throw new Exception(Properties.Resource.INVALID_INPUT_FILE);
            }

            this.ParseHeader(reader);

            var alignedSequence = new AlignedSequence();
            IList<string> ids = null;
            bool isInBlock = true;

            if (this.line.StartsWith("begin", StringComparison.OrdinalIgnoreCase))
            {
                while (this.line != null && isInBlock)
                {
                    if (string.IsNullOrEmpty(this.line.Trim()))
                    {
                        this.ReadNextLine(reader);
                        continue;
                    }

                    string blockName = GetTokens(this.line)[1];

                    switch (blockName.ToUpperInvariant())
                    {
                        case "TAXA":
                        case "TAXA;":
                            // This block contains the count of sequence & title of each sequence
                            ids = this.ParseTaxaBlock(reader);
                            break;

                        case "CHARACTERS":
                        case "CHARACTERS;":
                            // Block contains sequences
                            Dictionary<string, string> dataSet = this.ParseCharacterBlock(reader, ids);
                            IAlphabet alignmentAlphabet = null;

                            foreach (string id in ids)
                            {
                                IAlphabet alphabet = this.Alphabet;
                                string data = dataSet[id];

                                if (null == alphabet)
                                {
                                    byte[] dataArray = data.ToByteArray();
                                    alphabet = Alphabets.AutoDetectAlphabet(dataArray, 0, dataArray.Length, null);

                                    if (null == alphabet)
                                    {
                                        throw new InvalidDataException(string.Format(
                                            CultureInfo.InvariantCulture,
                                            Properties.Resource.InvalidSymbolInString,
                                            data));
                                    }
                                    
                                    if (null == alignmentAlphabet)
                                    {
                                        alignmentAlphabet = alphabet;
                                    }
                                    else
                                    {
                                        if (alignmentAlphabet != alphabet)
                                        {
                                            throw new InvalidDataException(string.Format(
                                                CultureInfo.InvariantCulture,
                                                Properties.Resource.SequenceAlphabetMismatch));
                                        }
                                    }
                                }

                                alignedSequence.Sequences.Add(new Sequence(alphabet, data) { ID = id });
                            }

                            break;

                        case "END":
                        case "END;":
                            // Have reached the end of block
                            isInBlock = false;
                            break;

                        default:
                            // skip this block
                            while (this.line != null)
                            {
                                this.ReadNextLine(reader);
                                if (0 == string.Compare(this.line, "end;", StringComparison.OrdinalIgnoreCase))
                                {
                                    break;
                                }
                            }
                            break;
                    }

                    this.ReadNextLine(reader);
                }
            }

            ISequenceAlignment sequenceAlignment = new SequenceAlignment();
            sequenceAlignment.AlignedSequences.Add(alignedSequence);
            return sequenceAlignment;
        }
Exemplo n.º 9
0
 /// <summary>
 /// Initializes a new instance of the PairwiseSequenceAlignment class
 /// Constructs an empty PairwiseSequenceAlignment.
 /// </summary>
 public PairwiseSequenceAlignment()
 {
     seqAlignment = new SequenceAlignment();
     IsReadOnly = false;  // initializes to false by default, but make it explicit for good style.
 }
Exemplo n.º 10
0
 /// <summary>
 /// Initializes a new instance of the PairwiseSequenceAlignment class
 /// Constructs PairwiseSequenceAlignment with input sequences.
 /// </summary>
 /// <param name="firstSequence">First input sequence.</param>
 /// <param name="secondSequence">Second input sequence.</param>
 public PairwiseSequenceAlignment(ISequence firstSequence, ISequence secondSequence)
 {
     seqAlignment = new SequenceAlignment(new [] { firstSequence, secondSequence });
     alignedSequences = new List<PairwiseAlignedSequence>();
     IsReadOnly = false;  // initializes to false by default, but make it explicit for good style.
 }
Exemplo n.º 11
0
        /// <summary>
        /// Parses a single biological sequence alignment text from a stream.
        /// </summary>
        /// <param name="reader">Reader</param>
        /// <returns>Sequence</returns>
        private ISequenceAlignment ParseOne(StreamReader reader)
        {
            // no empty files allowed
            if (line == null)
                ReadNextLine(reader);

            if (line == null)
                throw new InvalidDataException(Properties.Resource.IONoTextToParse);

            if (!line.StartsWith("CLUSTAL", StringComparison.OrdinalIgnoreCase))
            {
                throw new InvalidDataException(
                    string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name));
            }

            ReadNextLine(reader);  // Skip blank lines until we get to the first block.

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            skipBlankLines = false;

            var mapIdToSequence = new Dictionary<string, Tuple<ISequence, List<byte>>>();
            IAlphabet alignmentAlphabet = null;
            bool isFirstBlock = true;
            bool inBlock = false;
            var endOfBlockSymbols = new HashSet<char> { '*', ' ', '.', '+', ':' };

            while (reader.Peek() != -1)
            {
                // Blank line or consensus line signals end of block.
                if (String.IsNullOrEmpty(line) || line.ToCharArray().All(endOfBlockSymbols.Contains))
                {
                    if (inBlock)
                    {
                        // Blank line signifies end of block
                        inBlock = false;
                        isFirstBlock = false;
                    }
                }
                else // It's not a blank or consensus line.
                {
                    // It's a data line in a block.
                    // Lines begin with sequence id, then the sequence segment, and optionally a number, which we will ignore
                    string[] tokens = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); // (char[])null uses whitespace delimiters
                    string id = tokens[0];
                    string data = tokens[1].ToUpperInvariant();
                    byte[] byteData = Encoding.UTF8.GetBytes(data);
                    Tuple<ISequence, List<byte>> sequenceTuple;
                    IAlphabet alphabet = Alphabet;

                    inBlock = true;
                    if (isFirstBlock)
                    {
                        if (null == alphabet)
                        {
                            alphabet = Alphabets.AutoDetectAlphabet(byteData, 0, byteData.Length, alphabet);

                            if (null == alphabet)
                            {
                                throw new InvalidDataException(string.Format(
                                        CultureInfo.InvariantCulture,
                                        Properties.Resource.InvalidSymbolInString,
                                        data));
                            }
                            
                            if (null == alignmentAlphabet)
                            {
                                alignmentAlphabet = alphabet;
                            }
                            else
                            {
                                if (alignmentAlphabet != alphabet)
                                {
                                    throw new InvalidDataException(string.Format(
                                        CultureInfo.CurrentCulture,
                                        Properties.Resource.SequenceAlphabetMismatch));
                                }
                            }
                        }

                        sequenceTuple = new Tuple<ISequence, List<byte>>(
                            new Sequence(alphabet, "") { ID = id }, 
                            new List<byte>());
                        sequenceTuple.Item2.AddRange(byteData);

                        mapIdToSequence.Add(id, sequenceTuple);
                    }
                    else
                    {
                        if (!mapIdToSequence.ContainsKey(id))
                        {
                            throw new InvalidDataException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.ClustalUnknownSequence, id));
                        }

                        sequenceTuple = mapIdToSequence[id];
                        sequenceTuple.Item2.AddRange(byteData);
                    }
                }

                ReadNextLine(reader);
            }

            var sequenceAlignment = new SequenceAlignment();
            var alignedSequence = new AlignedSequence();
            sequenceAlignment.AlignedSequences.Add(alignedSequence);
            foreach (var alignmentSequenceTuple in mapIdToSequence.Values)
            {
                alignedSequence.Sequences.Add(
                    new Sequence(alignmentSequenceTuple.Item1.Alphabet, alignmentSequenceTuple.Item2.ToArray()) 
                    { 
                        ID = alignmentSequenceTuple.Item1.ID 
                    });
            }

            return sequenceAlignment;
        }
Exemplo n.º 12
0
        public void TestSequenceAlignmentToString()
        {
            ISequenceAligner aligner = SequenceAligners.NeedlemanWunsch;
            IAlphabet alphabet = Alphabets.Protein;
            const string origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN";
            const string origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN";

            // Create input sequences
            var inputSequences = new List<ISequence>
                {
                    new Sequence(alphabet, origSequence1),
                    new Sequence(alphabet, origSequence2)
                };

            // Get aligned sequences
            IList<ISequenceAlignment> alignments = aligner.Align(inputSequences);
            ISequenceAlignment alignment = new SequenceAlignment();
            foreach (var alignedSequence in alignments[0].AlignedSequences)
                alignment.AlignedSequences.Add(alignedSequence);

            const string expected = "XXIPXXXXLXXXXXXFXXXXXXLSXXLHN\r\n" +
                                    "KRIPKSQNLRSIHSIFPFLEDKLSHL--N\r\n" +
                                    "LNIPSLITLNKSIYVFSKRKKRLSGFLHN\r\n\r\n";
            Assert.AreEqual(expected.Replace("\r\n", Environment.NewLine), alignment.ToString());
        }
Exemplo n.º 13
0
        /// <summary>
        ///     Validate sequence alignment instance using different aligners
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="aligner">sw/nw/pw aligners</param>
        private void ValidateSequenceAlignmentCtor(string nodeName, ISequenceAligner aligner)
        {
            IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.AlphabetNameNode));
            string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
            string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2);

            // Create input sequences
            var inputSequences = new List<ISequence>();
            inputSequences.Add(new Sequence(alphabet, origSequence1));
            inputSequences.Add(new Sequence(alphabet, origSequence2));

            // Get aligned sequences
            IList<ISequenceAlignment> alignments = aligner.Align(inputSequences);
            ISequenceAlignment alignment = new SequenceAlignment();
            for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++)
            {
                alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]);
            }

            foreach (string key in alignments[0].Metadata.Keys)
            {
                alignment.Metadata.Add(key, alignments[0].Metadata[key]);
            }

            // Validate the properties
            for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++)
            {
                Assert.AreEqual(alignments[0].AlignedSequences[ialigned].Sequences[0].ToString(),
                                alignment.AlignedSequences[ialigned].Sequences[0].ToString());
            }

            foreach (string key in alignments[0].Metadata.Keys)
            {
                Assert.AreEqual(alignments[0].Metadata[key], alignment.Metadata[key]);
            }

            ApplicationLog.WriteLine(@"Alignment BVT : Validation of sequence alignment  ctor completed successfully");
        }