public void TestAlignedSequenceToString() { IList <ISequence> seqList = new List <ISequence>(); seqList.Add(new Sequence(Alphabets.DNA, "CAAAAGGGATTGC---TGTTGGAGTGAATGCCATTACCTACCGGCTAGGAGGAGTAGTACAAAGGAGC")); seqList.Add(new Sequence(Alphabets.DNA, "CAAAAGGGATTGC---")); seqList.Add(new Sequence(Alphabets.DNA, "TAGTAGTTCTGCTATATACATTTG")); seqList.Add(new Sequence(Alphabets.DNA, "GTTATCATGCGAACAATTCAACAGACACTGTAGA")); NucmerPairwiseAligner num = new NucmerPairwiseAligner(); num.BreakLength = 8; num.FixedSeparation = 0; num.MinimumScore = 0; num.MaximumSeparation = 0; num.SeparationFactor = 0; num.LengthOfMUM = 8; IList <ISequence> sequenceList = seqList; IList <ISequenceAlignment> alignmentObj = num.Align(sequenceList); AlignedSequence alignedSeqs = (AlignedSequence)alignmentObj[0].AlignedSequences[0]; string actualString = alignedSeqs.ToString(); string expectedString = "CAAAAGGGATTGC---\r\nCAAAAGGGATTGC---\r\nCAAAAGGGATTGC---\r\n"; Assert.AreEqual(actualString, expectedString); }
public void ValidateAlignedSequenceToString() { IList <ISequence> seqList = new List <ISequence>(); string actualAlignedSeqString = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.AlignedSeqActualNode); seqList.Add(new Sequence(Alphabets.DNA, actualAlignedSeqString)); seqList.Add(new Sequence(Alphabets.DNA, "CAAAAGGGATTGC---")); seqList.Add(new Sequence(Alphabets.DNA, "TAGTAGTTCTGCTATATACATTTG")); seqList.Add(new Sequence(Alphabets.DNA, "GTTATCATGCGAACAATTCAACAGACACTGTAGA")); NucmerPairwiseAligner num = new NucmerPairwiseAligner(); num.BreakLength = 8; num.FixedSeparation = 0; num.MinimumScore = 0; num.MaximumSeparation = 0; num.SeparationFactor = 0; num.LengthOfMUM = 8; IList <ISequence> sequenceList = seqList; IList <ISequenceAlignment> alignmentObj = num.Align(sequenceList); AlignedSequence alignedSeqs = (AlignedSequence)alignmentObj[0].AlignedSequences[0]; string actualString = alignedSeqs.ToString(); string expectedString = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.AlignedSeqExpectedNode); Assert.AreEqual(actualString.Replace("\r\n", ""), expectedString.Replace("\\r\\n", "")); }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="inputSequences">Input sequences</param> /// <returns>Alignment results</returns> public IList <Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences) { // Reset all our data in case this same instance is used multiple times. _alignedSequences = _alignedSequencesA = _alignedSequencesB = _alignedSequencesC = null; _alignmentScore = _alignmentScoreA = _alignmentScoreB = _alignmentScoreC = float.MinValue; // Get our list of sequences. List <ISequence> sequences = inputSequences.ToList(); if (sequences.Count == 0) { throw new ArgumentException("Empty input sequences"); } // Assign the gap open/extension cost if it hasn't been assigned. if (GapOpenCost == 0) { GapOpenCost = -4; } if (GapExtensionCost == 0) { GapExtensionCost = -1; } Performance.Start(); // Assign the alphabet SetAlphabet(sequences, SimilarityMatrix, true); MsaUtils.SetProfileItemSets(_alphabet); Performance.Snapshot("Start Aligning"); // Work... DoAlignment(sequences); // just for the purpose of integrating PW and MSA with the same output var alignment = new Alignment.SequenceAlignment(); IAlignedSequence aSequence = new AlignedSequence(); foreach (var alignedSequence in AlignedSequences) { aSequence.Sequences.Add(alignedSequence); } foreach (var inputSequence in sequences) { alignment.Sequences.Add(inputSequence); } alignment.AlignedSequences.Add(aSequence); return(new List <Alignment.ISequenceAlignment>() { alignment }); }
/// <summary> /// Validate aligned sequence instance using different aligners /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="aligner">sw/nw/pw aligners</param> private void ValidateAlignedSequenceCtor(string nodeName, ISequenceAligner aligner) { IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequences var inputSequences = new List <ISequence>(); inputSequences.Add(new Sequence(alphabet, origSequence1)); inputSequences.Add(new Sequence(alphabet, origSequence2)); // Get aligned sequences IAlignedSequence alignedSequence = new AlignedSequence(); IList <ISequenceAlignment> alignment = aligner.Align(inputSequences); // add aligned sequence and metadata information for (int iseq = 0; iseq < alignment[0].AlignedSequences[0].Sequences.Count; iseq++) { alignedSequence.Sequences.Add(alignment[0].AlignedSequences[0].Sequences[iseq]); } foreach (string key in alignment[0].AlignedSequences[0].Metadata.Keys) { alignedSequence.Metadata.Add(key, alignment[0].AlignedSequences[0].Metadata[key]); } // Validate the alignedsequence properties for (int index = 0; index < alignment[0].AlignedSequences[0].Sequences.Count; index++) { Assert.AreEqual(alignment[0].AlignedSequences[0].Sequences[index].ToString(), alignedSequence.Sequences[index].ToString()); } foreach (string key in alignment[0].AlignedSequences[0].Metadata.Keys) { Assert.AreEqual(alignment[0].AlignedSequences[0].Metadata[key], alignedSequence.Metadata[key]); } ApplicationLog.WriteLine(@"Alignment BVT : Validation of aligned sequence ctor completed successfully"); }
/// <summary> /// Parses a single biological sequence alignment text from a stream. /// </summary> /// <param name="reader">Reader</param> /// <returns>Sequence</returns> private ISequenceAlignment ParseOne(StreamReader reader) { // no empty files allowed if (line == null) { ReadNextLine(reader); } if (line == null) { throw new InvalidDataException(Properties.Resource.IONoTextToParse); } if (!line.StartsWith("CLUSTAL", StringComparison.OrdinalIgnoreCase)) { throw new InvalidDataException( string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name)); } ReadNextLine(reader); // Skip blank lines until we get to the first block. // Now that we're at the first block, one or more blank lines are the block separators, which we'll need. skipBlankLines = false; var mapIdToSequence = new Dictionary <string, Tuple <ISequence, List <byte> > >(); IAlphabet alignmentAlphabet = null; bool isFirstBlock = true; bool inBlock = false; var endOfBlockSymbols = new HashSet <char> { '*', ' ', '.', '+', ':' }; while (reader.Peek() != -1) { // Blank line or consensus line signals end of block. if (String.IsNullOrEmpty(line) || line.ToCharArray().All(endOfBlockSymbols.Contains)) { if (inBlock) { // Blank line signifies end of block inBlock = false; isFirstBlock = false; } } else // It's not a blank or consensus line. { // It's a data line in a block. // Lines begin with sequence id, then the sequence segment, and optionally a number, which we will ignore string[] tokens = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); // (char[])null uses whitespace delimiters string id = tokens[0]; string data = tokens[1].ToUpperInvariant(); byte[] byteData = Encoding.UTF8.GetBytes(data); Tuple <ISequence, List <byte> > sequenceTuple; IAlphabet alphabet = Alphabet; inBlock = true; if (isFirstBlock) { if (null == alphabet) { alphabet = Alphabets.AutoDetectAlphabet(byteData, 0, byteData.Length, alphabet); if (null == alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, data)); } if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { throw new InvalidDataException(string.Format( CultureInfo.CurrentCulture, Properties.Resource.SequenceAlphabetMismatch)); } } } sequenceTuple = new Tuple <ISequence, List <byte> >( new Sequence(alphabet, "") { ID = id }, new List <byte>()); sequenceTuple.Item2.AddRange(byteData); mapIdToSequence.Add(id, sequenceTuple); } else { if (!mapIdToSequence.ContainsKey(id)) { throw new InvalidDataException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.ClustalUnknownSequence, id)); } sequenceTuple = mapIdToSequence[id]; sequenceTuple.Item2.AddRange(byteData); } } ReadNextLine(reader); } var sequenceAlignment = new SequenceAlignment(); var alignedSequence = new AlignedSequence(); sequenceAlignment.AlignedSequences.Add(alignedSequence); foreach (var alignmentSequenceTuple in mapIdToSequence.Values) { alignedSequence.Sequences.Add( new Sequence(alignmentSequenceTuple.Item1.Alphabet, alignmentSequenceTuple.Item2.ToArray()) { ID = alignmentSequenceTuple.Item1.ID }); } return(sequenceAlignment); }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="inputSequences">Input sequences</param> /// <returns>Alignment results</returns> public IList<Alignment.ISequenceAlignment> Align(IEnumerable<ISequence> inputSequences) { // Reset all our data in case this same instance is used multiple times. this.AlignedSequences = this.AlignedSequencesA = this.AlignedSequencesB = this.AlignedSequencesC = null; this.AlignmentScore = this.AlignmentScoreA = this.AlignmentScoreB = this.AlignmentScoreC = float.MinValue; // Get our list of sequences. List<ISequence> sequences = inputSequences.ToList(); if (sequences.Count == 0) { throw new ArgumentException("Empty input sequences"); } // Assign the gap open/extension cost if it hasn't been assigned. if (GapOpenCost == 0) GapOpenCost = -4; if (GapExtensionCost == 0) GapExtensionCost = -1; StartLog(); // Assign the alphabet SetAlphabet(sequences, SimilarityMatrix, true); MsaUtils.SetProfileItemSets(this.alphabet); ReportLog("Start Aligning"); // Work... DoAlignment(sequences); // just for the purpose of integrating PW and MSA with the same output var alignment = new Alignment.SequenceAlignment(); IAlignedSequence aSequence = new AlignedSequence(); foreach (var alignedSequence in AlignedSequences) aSequence.Sequences.Add(alignedSequence); foreach (var inputSequence in sequences) alignment.Sequences.Add(inputSequence); alignment.AlignedSequences.Add(aSequence); return new List<Alignment.ISequenceAlignment>() {alignment}; }
/// <summary> /// Parses a single biological sequence alignment text from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <returns>The parsed ISequenceAlignment object.</returns> ISequenceAlignment ParseOne(TextReader reader) { ReadNextLine(reader); if (line == null) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } this.ParseHeader(reader); var alignedSequence = new AlignedSequence(); IList <string> ids = null; bool isInBlock = true; if (this.line.StartsWith("begin", StringComparison.OrdinalIgnoreCase)) { while (this.line != null && isInBlock) { if (string.IsNullOrEmpty(this.line.Trim())) { this.ReadNextLine(reader); continue; } string blockName = GetTokens(this.line)[1]; switch (blockName.ToUpperInvariant()) { case "TAXA": case "TAXA;": // This block contains the count of sequence & title of each sequence ids = this.ParseTaxaBlock(reader); break; case "CHARACTERS": case "CHARACTERS;": // Block contains sequences Dictionary <string, string> dataSet = this.ParseCharacterBlock(reader, ids); IAlphabet alignmentAlphabet = null; foreach (string id in ids) { IAlphabet alphabet = this.Alphabet; string data = dataSet[id]; if (null == alphabet) { byte[] dataArray = data.ToByteArray(); alphabet = Alphabets.AutoDetectAlphabet(dataArray, 0, dataArray.Length, null); if (null == alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, data)); } if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.SequenceAlphabetMismatch)); } } } alignedSequence.Sequences.Add(new Sequence(alphabet, data) { ID = id }); } break; case "END": case "END;": // Have reached the end of block isInBlock = false; break; default: // skip this block while (this.line != null) { this.ReadNextLine(reader); if (0 == string.Compare(this.line, "end;", StringComparison.OrdinalIgnoreCase)) { break; } } break; } this.ReadNextLine(reader); } } ISequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(alignedSequence); return(sequenceAlignment); }