/// <summary> /// Initializes a new instance of the PairwiseSequenceAlignment class /// Constructs PairwiseSequenceAlignment with input sequences. /// </summary> /// <param name="firstSequence">First input sequence.</param> /// <param name="secondSequence">Second input sequence.</param> public PairwiseSequenceAlignment(ISequence firstSequence, ISequence secondSequence) { seqAlignment = new SequenceAlignment(); seqAlignment.Sequences.Add(firstSequence); seqAlignment.Sequences.Add(secondSequence); alignedSequences = new List <PairwiseAlignedSequence>(); }
/// <summary> /// Initializes a new instance of the PairwiseSequenceAlignment class /// Constructor for deserialization. /// </summary> /// <param name="info">Serialization Info.</param> /// <param name="context">Streaming context.</param> protected PairwiseSequenceAlignment(SerializationInfo info, StreamingContext context) { if (info == null) { throw new ArgumentNullException("info"); } _alignedSequences = (List <PairwiseAlignedSequence>)info.GetValue("AlignedSeqs", typeof(List <PairwiseAlignedSequence>)); _seqAlignment = (SequenceAlignment)info.GetValue("base", typeof(SequenceAlignment)); Documentation = info.GetValue("Doc", typeof(object)); IsReadOnly = info.GetBoolean("IsReadOnly"); }
/// <summary> /// Initializes a new instance of the PairwiseSequenceAlignment class. /// Internal constructor to create new instance of PairwiseSequenceAlignment /// from ISequenceAlignment. /// </summary> /// <param name="seqAlignment">ISequenceAlignment instance.</param> internal PairwiseSequenceAlignment(ISequenceAlignment seqAlignment) { _seqAlignment = new SequenceAlignment(seqAlignment); _alignedSequences = new List <PairwiseAlignedSequence>(); foreach (AlignedSequence alignedSeq in seqAlignment.AlignedSequences) { _alignedSequences.Add(new PairwiseAlignedSequence(alignedSeq)); } // Clear the AlignedSequences in the _seqAlignment as this no longer needed. if (!_seqAlignment.AlignedSequences.IsReadOnly) { _seqAlignment.AlignedSequences.Clear(); } }
/// <summary> /// Parses a single biological sequence alignment text from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <returns>The parsed ISequenceAlignment object.</returns> ISequenceAlignment ParseOne(TextReader reader) { if (line == null) ReadNextLine(reader); // no empty files allowed if (line == null) { throw new InvalidDataException(Properties.Resource.IONoTextToParse); } // Parse first line IList<string> tokens = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); if (2 != tokens.Count) { throw new InvalidDataException( string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name)); } bool isFirstBlock = true; int sequenceCount; int sequenceLength; IList<Tuple<Sequence, List<byte>>> data = new List<Tuple<Sequence, List<byte>>>(); IAlphabet alignmentAlphabet = null; sequenceCount = Int32.Parse(tokens[0], CultureInfo.InvariantCulture); sequenceLength = Int32.Parse(tokens[1], CultureInfo.InvariantCulture); ReadNextLine(reader); // Skip blank lines until we get to the first block. // Now that we're at the first block, one or more blank lines are the block separators, which we'll need. skipBlankLines = false; while (reader.Peek() != -1) { if (string.IsNullOrWhiteSpace(line)) { ReadNextLine(reader); continue; } for (int index = 0; index < sequenceCount; index++) { if (isFirstBlock) { // First 10 characters are sequence ID, remaining is the first block of sequence // Note that both may contain whitespace, and there may be no whitespace between them. if (line.Length <= 10) { throw new Exception(string.Format( CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name)); } string id = line.Substring(0, 10).Trim(); string sequenceString = line.Substring(10).Replace(" ",""); byte[] sequenceBytes = Encoding.UTF8.GetBytes(sequenceString); IAlphabet alphabet = Alphabet; if (null == alphabet) { alphabet = Alphabets.AutoDetectAlphabet(sequenceBytes, 0, sequenceBytes.Length, alphabet); if (null == alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, sequenceString)); } if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { throw new InvalidDataException(Properties.Resource.SequenceAlphabetMismatch); } } } var sequenceStore = new Tuple<Sequence, List<byte>>( new Sequence(alphabet, string.Empty){ ID = id }, new List<byte>()); sequenceStore.Item2.AddRange(sequenceBytes); data.Add(sequenceStore); } else { Tuple<Sequence, List<byte>> sequence = data[index]; byte[] sequenceBytes = Encoding.UTF8.GetBytes(line.Replace(" ","")); sequence.Item2.AddRange(sequenceBytes); } ReadNextLine(reader); } // Reset the first block flag isFirstBlock = false; } // Validate for the count of sequence if (sequenceCount != data.Count) { throw new InvalidDataException(Properties.Resource.SequenceCountMismatch); } SequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(new AlignedSequence()); foreach (var dataSequence in data) { // Validate for the count of sequence if (sequenceLength != dataSequence.Item2.Count) { throw new InvalidDataException(Properties.Resource.SequenceLengthMismatch); } sequenceAlignment.AlignedSequences[0].Sequences.Add( new Sequence(dataSequence.Item1.Alphabet, dataSequence.Item2.ToArray()) { ID = dataSequence.Item1.ID }); } return sequenceAlignment; }
/// <summary> /// Initializes a new instance of the PairwiseSequenceAlignment class /// Constructs an empty PairwiseSequenceAlignment. /// </summary> public PairwiseSequenceAlignment() { seqAlignment = new SequenceAlignment(); IsReadOnly = false; // initializes to false by default, but make it explicit for good style. }
/// <summary> /// Initializes a new instance of the PairwiseSequenceAlignment class /// Constructs PairwiseSequenceAlignment with input sequences. /// </summary> /// <param name="firstSequence">First input sequence.</param> /// <param name="secondSequence">Second input sequence.</param> public PairwiseSequenceAlignment(ISequence firstSequence, ISequence secondSequence) { seqAlignment = new SequenceAlignment(new [] { firstSequence, secondSequence }); alignedSequences = new List <PairwiseAlignedSequence>(); IsReadOnly = false; // initializes to false by default, but make it explicit for good style. }
public void ValidateSequenceAlignmentToString() { ISequenceAligner aligner = SequenceAligners.NeedlemanWunsch; IAlphabet alphabet = Alphabets.Protein; string origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN"; string origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN"; // Create input sequences var inputSequences = new List<ISequence>(); inputSequences.Add(new Sequence(alphabet, origSequence1)); inputSequences.Add(new Sequence(alphabet, origSequence2)); // Get aligned sequences IList<ISequenceAlignment> alignments = aligner.Align(inputSequences); ISequenceAlignment alignment = new SequenceAlignment(); for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++) { alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]); } foreach (string key in alignments[0].Metadata.Keys) { alignment.Metadata.Add(key, alignments[0].Metadata[key]); } string actualSequenceAlignmentString = alignment.ToString(); string ExpectedSequenceAlignmentString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants .SequenceAlignmentExpectedNode); Assert.AreEqual(ExpectedSequenceAlignmentString.Replace("\\r\\n", ""), actualSequenceAlignmentString.Replace(System.Environment.NewLine, "")); }
/// <summary> /// Parses a single biological sequence alignment text from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <returns>The parsed ISequenceAlignment object.</returns> ISequenceAlignment ParseOne(TextReader reader) { ReadNextLine(reader); if (line == null) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } this.ParseHeader(reader); var alignedSequence = new AlignedSequence(); IList<string> ids = null; bool isInBlock = true; if (this.line.StartsWith("begin", StringComparison.OrdinalIgnoreCase)) { while (this.line != null && isInBlock) { if (string.IsNullOrEmpty(this.line.Trim())) { this.ReadNextLine(reader); continue; } string blockName = GetTokens(this.line)[1]; switch (blockName.ToUpperInvariant()) { case "TAXA": case "TAXA;": // This block contains the count of sequence & title of each sequence ids = this.ParseTaxaBlock(reader); break; case "CHARACTERS": case "CHARACTERS;": // Block contains sequences Dictionary<string, string> dataSet = this.ParseCharacterBlock(reader, ids); IAlphabet alignmentAlphabet = null; foreach (string id in ids) { IAlphabet alphabet = this.Alphabet; string data = dataSet[id]; if (null == alphabet) { byte[] dataArray = data.ToByteArray(); alphabet = Alphabets.AutoDetectAlphabet(dataArray, 0, dataArray.Length, null); if (null == alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, data)); } if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.SequenceAlphabetMismatch)); } } } alignedSequence.Sequences.Add(new Sequence(alphabet, data) { ID = id }); } break; case "END": case "END;": // Have reached the end of block isInBlock = false; break; default: // skip this block while (this.line != null) { this.ReadNextLine(reader); if (0 == string.Compare(this.line, "end;", StringComparison.OrdinalIgnoreCase)) { break; } } break; } this.ReadNextLine(reader); } } ISequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(alignedSequence); return sequenceAlignment; }
/// <summary> /// Initializes a new instance of the PairwiseSequenceAlignment class /// Constructs an empty PairwiseSequenceAlignment. /// </summary> public PairwiseSequenceAlignment() { seqAlignment = new SequenceAlignment(); IsReadOnly = false; // initializes to false by default, but make it explicit for good style. }
/// <summary> /// Initializes a new instance of the PairwiseSequenceAlignment class /// Constructs PairwiseSequenceAlignment with input sequences. /// </summary> /// <param name="firstSequence">First input sequence.</param> /// <param name="secondSequence">Second input sequence.</param> public PairwiseSequenceAlignment(ISequence firstSequence, ISequence secondSequence) { seqAlignment = new SequenceAlignment(new [] { firstSequence, secondSequence }); alignedSequences = new List<PairwiseAlignedSequence>(); IsReadOnly = false; // initializes to false by default, but make it explicit for good style. }
/// <summary> /// Parses a single biological sequence alignment text from a stream. /// </summary> /// <param name="reader">Reader</param> /// <returns>Sequence</returns> private ISequenceAlignment ParseOne(StreamReader reader) { // no empty files allowed if (line == null) ReadNextLine(reader); if (line == null) throw new InvalidDataException(Properties.Resource.IONoTextToParse); if (!line.StartsWith("CLUSTAL", StringComparison.OrdinalIgnoreCase)) { throw new InvalidDataException( string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name)); } ReadNextLine(reader); // Skip blank lines until we get to the first block. // Now that we're at the first block, one or more blank lines are the block separators, which we'll need. skipBlankLines = false; var mapIdToSequence = new Dictionary<string, Tuple<ISequence, List<byte>>>(); IAlphabet alignmentAlphabet = null; bool isFirstBlock = true; bool inBlock = false; var endOfBlockSymbols = new HashSet<char> { '*', ' ', '.', '+', ':' }; while (reader.Peek() != -1) { // Blank line or consensus line signals end of block. if (String.IsNullOrEmpty(line) || line.ToCharArray().All(endOfBlockSymbols.Contains)) { if (inBlock) { // Blank line signifies end of block inBlock = false; isFirstBlock = false; } } else // It's not a blank or consensus line. { // It's a data line in a block. // Lines begin with sequence id, then the sequence segment, and optionally a number, which we will ignore string[] tokens = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); // (char[])null uses whitespace delimiters string id = tokens[0]; string data = tokens[1].ToUpperInvariant(); byte[] byteData = Encoding.UTF8.GetBytes(data); Tuple<ISequence, List<byte>> sequenceTuple; IAlphabet alphabet = Alphabet; inBlock = true; if (isFirstBlock) { if (null == alphabet) { alphabet = Alphabets.AutoDetectAlphabet(byteData, 0, byteData.Length, alphabet); if (null == alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, data)); } if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { throw new InvalidDataException(string.Format( CultureInfo.CurrentCulture, Properties.Resource.SequenceAlphabetMismatch)); } } } sequenceTuple = new Tuple<ISequence, List<byte>>( new Sequence(alphabet, "") { ID = id }, new List<byte>()); sequenceTuple.Item2.AddRange(byteData); mapIdToSequence.Add(id, sequenceTuple); } else { if (!mapIdToSequence.ContainsKey(id)) { throw new InvalidDataException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.ClustalUnknownSequence, id)); } sequenceTuple = mapIdToSequence[id]; sequenceTuple.Item2.AddRange(byteData); } } ReadNextLine(reader); } var sequenceAlignment = new SequenceAlignment(); var alignedSequence = new AlignedSequence(); sequenceAlignment.AlignedSequences.Add(alignedSequence); foreach (var alignmentSequenceTuple in mapIdToSequence.Values) { alignedSequence.Sequences.Add( new Sequence(alignmentSequenceTuple.Item1.Alphabet, alignmentSequenceTuple.Item2.ToArray()) { ID = alignmentSequenceTuple.Item1.ID }); } return sequenceAlignment; }
public void TestSequenceAlignmentToString() { ISequenceAligner aligner = SequenceAligners.NeedlemanWunsch; IAlphabet alphabet = Alphabets.Protein; const string origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN"; const string origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN"; // Create input sequences var inputSequences = new List<ISequence> { new Sequence(alphabet, origSequence1), new Sequence(alphabet, origSequence2) }; // Get aligned sequences IList<ISequenceAlignment> alignments = aligner.Align(inputSequences); ISequenceAlignment alignment = new SequenceAlignment(); foreach (var alignedSequence in alignments[0].AlignedSequences) alignment.AlignedSequences.Add(alignedSequence); const string expected = "XXIPXXXXLXXXXXXFXXXXXXLSXXLHN\r\n" + "KRIPKSQNLRSIHSIFPFLEDKLSHL--N\r\n" + "LNIPSLITLNKSIYVFSKRKKRLSGFLHN\r\n\r\n"; Assert.AreEqual(expected.Replace("\r\n", Environment.NewLine), alignment.ToString()); }
/// <summary> /// Validate sequence alignment instance using different aligners /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="aligner">sw/nw/pw aligners</param> private void ValidateSequenceAlignmentCtor(string nodeName, ISequenceAligner aligner) { IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequences var inputSequences = new List<ISequence>(); inputSequences.Add(new Sequence(alphabet, origSequence1)); inputSequences.Add(new Sequence(alphabet, origSequence2)); // Get aligned sequences IList<ISequenceAlignment> alignments = aligner.Align(inputSequences); ISequenceAlignment alignment = new SequenceAlignment(); for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++) { alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]); } foreach (string key in alignments[0].Metadata.Keys) { alignment.Metadata.Add(key, alignments[0].Metadata[key]); } // Validate the properties for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++) { Assert.AreEqual(alignments[0].AlignedSequences[ialigned].Sequences[0].ToString(), alignment.AlignedSequences[ialigned].Sequences[0].ToString()); } foreach (string key in alignments[0].Metadata.Keys) { Assert.AreEqual(alignments[0].Metadata[key], alignment.Metadata[key]); } ApplicationLog.WriteLine(@"Alignment BVT : Validation of sequence alignment ctor completed successfully"); }