public Mapper(SequenceAlignment alignment, string connectionString, string databaseName) { DatabaseName = databaseName; ConnectionString = connectionString; MappedAlignment = alignment; MappedSuccessfully = false; }
static void Main(string[] args) { List <string> alphabet = new List <string>(new[] { "H", "A", "P", "L", "E", "-" }); string[] X = { "", "A", "P", "P", "L", "E" }; string[] Y = { "", "H", "A", "P", "E" }; SequenceAlignment seqAlign = new SequenceAlignment(X, Y); seqAlign.ScoreMatrix(alphabet, 1, -1, -1); Dictionary <string, Dictionary <string, int> > scores = seqAlign.scores; foreach (string k in alphabet) { Console.Write(k + " "); foreach (KeyValuePair <string, int> p in scores[k]) { Console.Write(p.Key + " " + p.Value + " "); } Console.WriteLine(); } Console.WriteLine(); seqAlign.AlignScore(); Dictionary <string, string> ans = seqAlign.AlignSequence(); Console.WriteLine(ans["X"]); Console.WriteLine(ans["Y"]); Console.ReadLine(); }
public void ValidateSequenceAlignmentToString() { ISequenceAligner aligner = SequenceAligners.NeedlemanWunsch; IAlphabet alphabet = Alphabets.Protein; string origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN"; string origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN"; // Create input sequences var inputSequences = new List <ISequence>(); inputSequences.Add(new Sequence(alphabet, origSequence1)); inputSequences.Add(new Sequence(alphabet, origSequence2)); // Get aligned sequences IList <ISequenceAlignment> alignments = aligner.Align(inputSequences); ISequenceAlignment alignment = new SequenceAlignment(); for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++) { alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]); } foreach (string key in alignments[0].Metadata.Keys) { alignment.Metadata.Add(key, alignments[0].Metadata[key]); } string actualSequenceAlignmentString = alignment.ToString(); string ExpectedSequenceAlignmentString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants .SequenceAlignmentExpectedNode); Assert.AreEqual(ExpectedSequenceAlignmentString.Replace("\\r\\n", ""), actualSequenceAlignmentString.Replace(System.Environment.NewLine, "")); }
public void TestSequenceAlignmentToString() { ISequenceAligner aligner = SequenceAligners.NeedlemanWunsch; IAlphabet alphabet = Alphabets.Protein; const string origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN"; const string origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN"; // Create input sequences var inputSequences = new List <ISequence> { new Sequence(alphabet, origSequence1), new Sequence(alphabet, origSequence2) }; // Get aligned sequences IList <ISequenceAlignment> alignments = aligner.Align(inputSequences); ISequenceAlignment alignment = new SequenceAlignment(); foreach (var alignedSequence in alignments[0].AlignedSequences) { alignment.AlignedSequences.Add(alignedSequence); } const string expected = "XXIPXXXXLXXXXXXFXXXXXXLSXXLHN\r\n" + "KRIPKSQNLRSIHSIFPFLEDKLSHL--N\r\n" + "LNIPSLITLNKSIYVFSKRKKRLSGFLHN\r\n\r\n"; Assert.AreEqual(expected.Replace("\r\n", Environment.NewLine), alignment.ToString()); }
public void TestSequenceAlignmentToString() { ISequenceAligner aligner = SequenceAligners.NeedlemanWunsch; IAlphabet alphabet = Alphabets.Protein; string origSequence1 = "KRIPKSQNLRSIHSIFPFLEDKLSHLN"; string origSequence2 = "LNIPSLITLNKSIYVFSKRKKRLSGFLHN"; // Create input sequences List <ISequence> inputSequences = new List <ISequence>(); inputSequences.Add(new Sequence(alphabet, origSequence1)); inputSequences.Add(new Sequence(alphabet, origSequence2)); // Get aligned sequences IList <ISequenceAlignment> alignments = aligner.Align(inputSequences); ISequenceAlignment alignment = new SequenceAlignment(); for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++) { alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]); } foreach (string key in alignments[0].Metadata.Keys) { alignment.Metadata.Add(key, alignments[0].Metadata[key]); } string actualSequenceAlignmentString = alignment.ToString(); string ExpectedSequenceAlignmentString = "XXIPXXXXLXXXXXXFXXXXXXLSGFXXN\r\nKRIPKSQNLRSIHSIFPFLEDKLS--HLN\r\nLNIPSLITLNKSIYVFSKRKKRLSGFLHN\r\n\r\n"; Assert.AreEqual(ExpectedSequenceAlignmentString, actualSequenceAlignmentString); }
public void Write(IEnumerable <Read> reads, Stream targetStream) { if (reads == null) { reads = new Read[] {} } ; var _samFormatter = samOutput ? (IFormatter <ISequenceAlignment>) new SAMFormatter() : new BAMFormatter(); var _samAlignedSequences = reads.Select(SamReadsConverter.Convert).ToList(); var _sa = new SequenceAlignment(); foreach (var _samAlignedSequence in _samAlignedSequences) { _sa.AlignedSequences.Add(_samAlignedSequence); } var _refSequenceName = "1"; if (_samAlignedSequences.Count != 0) { _refSequenceName = _samAlignedSequences[0].RName; } _sa.Metadata.Add("SAMAlignmentHeader", new SAMAlignmentHeader { ReferenceSequences = { new ReferenceSequenceInfo { Name = _refSequenceName } } }); _samFormatter.Format(targetStream, _sa); } }
public AlignmentViewModel(SequenceAlignment alignment) { _alignment = alignment; _sequences = new MTObservableCollection <SequenceViewModel>(); MapToRCADCommand = new DelegatingCommand <rCADConnection>(MapAlignmentToRCAD, (a) => (!MappingToRCAD && !IsMappedToRCAD && a != null)); //MapToRCADCommand = new DelegatingCommand<string>(MapAlignmentToRCAD, (a) => (!MappingToRCAD && !IsMappedToRCAD && a != null)); LoadToRCADCommand = new DelegatingCommand <string>(LoadAlignmentToRCAD, (a) => (!MappingToRCAD && IsMappedToRCAD && _alignmentToRCADMapping != null && !LoadingToRCADFailed && !LoadingToRCAD && !IsLoadedToRCAD)); //LoadToRCADCommand = new DelegatingCommand(LoadAlignmentToRCAD, () => (!MappingToRCAD && IsMappedToRCAD && _alignmentToRCADMapping != null && !LoadingToRCADFailed && !LoadingToRCAD && !IsLoadedToRCAD)); Initialize(); }
public void ValidateSequenceAlignmentGetObjectData() { SerializationInfo info = new SerializationInfo(typeof(Sequence), new FormatterConverter()); StreamingContext context = new StreamingContext(StreamingContextStates.All); SequenceAlignment seqAlignmentObj = new SequenceAlignment(); seqAlignmentObj.GetObjectData(info, context); Assert.AreEqual(4, info.MemberCount); }
private void LoadAlignmentWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) { if (e.Result == null) { LoadingAlignmentStatusMessage = LOADING_ALIGNMENT_FAILED_MESSAGE; _loadingAlignment = false; CommandManager.InvalidateRequerySuggested(); return; } SequenceAlignment aln = e.Result as SequenceAlignment; _loadingAlignment = false; CommandManager.InvalidateRequerySuggested(); LoadingAlignmentStatusMessage = FINISHED_LOADING_ALIGNMENT_MESSAGE; Alignment = new AlignmentViewModel(aln); }
/// <summary> /// Validate sequence alignment instance using different aligners /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="aligner">sw/nw/pw aligners</param> private void ValidateSequenceAlignmentCtor(string nodeName, ISequenceAligner aligner) { IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequences var inputSequences = new List <ISequence>(); inputSequences.Add(new Sequence(alphabet, origSequence1)); inputSequences.Add(new Sequence(alphabet, origSequence2)); // Get aligned sequences IList <ISequenceAlignment> alignments = aligner.Align(inputSequences); ISequenceAlignment alignment = new SequenceAlignment(); for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++) { alignment.AlignedSequences.Add(alignments[0].AlignedSequences[ialigned]); } foreach (string key in alignments[0].Metadata.Keys) { alignment.Metadata.Add(key, alignments[0].Metadata[key]); } // Validate the properties for (int ialigned = 0; ialigned < alignments[0].AlignedSequences.Count; ialigned++) { Assert.AreEqual(alignments[0].AlignedSequences[ialigned].Sequences[0].ToString(), alignment.AlignedSequences[ialigned].Sequences[0].ToString()); } foreach (string key in alignments[0].Metadata.Keys) { Assert.AreEqual(alignments[0].Metadata[key], alignment.Metadata[key]); } ApplicationLog.WriteLine(@"Alignment BVT : Validation of sequence alignment ctor completed successfully"); }
private void LoadAlignmentWorker(object sender, DoWorkEventArgs e) { if (e.Argument == null) { return; } SequenceAlignmentLoaderArgs args = e.Argument as SequenceAlignmentLoaderArgs; if (args != null) { if (args.AlignmentFile == null) { return; } if (args.AlignmentType == AlignmentType.CRW) { ISequenceAlignmentLoader loader = new CRWSequenceAlignmentLoader(); SequenceAlignment aln = loader.Load(args.AlignmentFile); e.Result = aln; } } }
/// <summary> /// Parses a single Nexus text from a reader into a sequence. /// </summary> /// <param name="bioReader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false.</param> /// <returns>A new Sequence instance containing parsed data.</returns> protected ISequenceAlignment ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly) { if (bioReader == null) { throw new ArgumentNullException("bioReader"); } ParseHeader(bioReader); string message = string.Empty; ISequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(new AlignedSequence()); IList <string> ids = null; bool isInBlock = true; if (bioReader.Line.StartsWith("begin", StringComparison.OrdinalIgnoreCase)) { while (bioReader.HasLines && isInBlock) { if (string.IsNullOrEmpty(bioReader.Line.Trim())) { bioReader.GoToNextLine(); continue; } string blockName = GetTokens(bioReader.Line)[1]; switch (blockName.ToUpper(CultureInfo.InvariantCulture)) { case "TAXA": case "TAXA;": // This block contains the count of sequence & title of each sequence ids = (IList <string>)ParseTaxaBlock(bioReader); break; case "CHARACTERS": case "CHARACTERS;": // Block contains sequences Dictionary <string, string> dataSet = ParseCharacterBlock(bioReader, ids); IAlphabet alignmentAlphabet = null; string data = string.Empty; foreach (string ID in ids) { IAlphabet alphabet = Alphabet; Sequence sequence = null; data = dataSet[ID]; if (null == alphabet) { alphabet = _basicParser.IdentifyAlphabet(alphabet, data); if (null == alphabet) { message = string.Format( CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, data); throw new InvalidDataException(message); } else { if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.SequenceAlphabetMismatch); throw new InvalidDataException(message); } } } } if (Encoding == null) { sequence = new Sequence(alphabet, data); } else { sequence = new Sequence(alphabet, Encoding, data); } sequence.IsReadOnly = isReadOnly; sequence.ID = ID; sequenceAlignment.AlignedSequences[0].Sequences.Add(sequence); } break; case "END": case "END;": // Have reached the end of block isInBlock = false; break; default: // skip this block while (bioReader.HasLines) { bioReader.GoToNextLine(); if (0 == string.Compare(bioReader.Line, "end;", StringComparison.OrdinalIgnoreCase)) { break; } } break; } bioReader.GoToNextLine(); } } return(sequenceAlignment); }
/// <summary> /// Parses a single biological sequence alignment text from a stream. /// </summary> /// <param name="reader">Reader</param> /// <returns>Sequence</returns> private ISequenceAlignment ParseOne(StreamReader reader) { // no empty files allowed if (line == null) { ReadNextLine(reader); } if (line == null) { throw new InvalidDataException(Properties.Resource.IONoTextToParse); } if (!line.StartsWith("CLUSTAL", StringComparison.OrdinalIgnoreCase)) { throw new InvalidDataException( string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name)); } ReadNextLine(reader); // Skip blank lines until we get to the first block. // Now that we're at the first block, one or more blank lines are the block separators, which we'll need. skipBlankLines = false; var mapIdToSequence = new Dictionary <string, Tuple <ISequence, List <byte> > >(); IAlphabet alignmentAlphabet = null; bool isFirstBlock = true; bool inBlock = false; var endOfBlockSymbols = new HashSet <char> { '*', ' ', '.', '+', ':' }; while (reader.Peek() != -1) { // Blank line or consensus line signals end of block. if (String.IsNullOrEmpty(line) || line.ToCharArray().All(endOfBlockSymbols.Contains)) { if (inBlock) { // Blank line signifies end of block inBlock = false; isFirstBlock = false; } } else // It's not a blank or consensus line. { // It's a data line in a block. // Lines begin with sequence id, then the sequence segment, and optionally a number, which we will ignore string[] tokens = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); // (char[])null uses whitespace delimiters string id = tokens[0]; string data = tokens[1].ToUpperInvariant(); byte[] byteData = Encoding.UTF8.GetBytes(data); Tuple <ISequence, List <byte> > sequenceTuple; IAlphabet alphabet = Alphabet; inBlock = true; if (isFirstBlock) { if (null == alphabet) { alphabet = Alphabets.AutoDetectAlphabet(byteData, 0, byteData.Length, alphabet); if (null == alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, data)); } if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { throw new InvalidDataException(string.Format( CultureInfo.CurrentCulture, Properties.Resource.SequenceAlphabetMismatch)); } } } sequenceTuple = new Tuple <ISequence, List <byte> >( new Sequence(alphabet, "") { ID = id }, new List <byte>()); sequenceTuple.Item2.AddRange(byteData); mapIdToSequence.Add(id, sequenceTuple); } else { if (!mapIdToSequence.ContainsKey(id)) { throw new InvalidDataException(string.Format(CultureInfo.CurrentCulture, Properties.Resource.ClustalUnknownSequence, id)); } sequenceTuple = mapIdToSequence[id]; sequenceTuple.Item2.AddRange(byteData); } } ReadNextLine(reader); } var sequenceAlignment = new SequenceAlignment(); var alignedSequence = new AlignedSequence(); sequenceAlignment.AlignedSequences.Add(alignedSequence); foreach (var alignmentSequenceTuple in mapIdToSequence.Values) { alignedSequence.Sequences.Add( new Sequence(alignmentSequenceTuple.Item1.Alphabet, alignmentSequenceTuple.Item2.ToArray()) { ID = alignmentSequenceTuple.Item1.ID }); } return(sequenceAlignment); }
/// <summary> /// Parses a single biological sequence alignment text from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <returns>The parsed ISequenceAlignment object.</returns> public ISequenceAlignment ParseOne(TextReader reader) { string message = string.Empty; if (reader == null) { throw new ArgumentNullException("reader"); } if (line == null) { ReadNextLine(reader); } // no empty files allowed if (line == null) { throw new InvalidDataException(Properties.Resource.IONoTextToParse); } // Parse first line IList <string> tokens = line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); if (2 != tokens.Count) { message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name); throw new InvalidDataException(message); } bool isFirstBlock = true; int sequenceCount = 0; int sequenceLength = 0; IList <Tuple <Sequence, List <byte> > > data = new List <Tuple <Sequence, List <byte> > >(); string id = string.Empty; string sequenceString = string.Empty; Tuple <Sequence, List <byte> > sequence = null; IAlphabet alignmentAlphabet = null; sequenceCount = Int32.Parse(tokens[0], CultureInfo.InvariantCulture); sequenceLength = Int32.Parse(tokens[1], CultureInfo.InvariantCulture); ReadNextLine(reader); // Skip blank lines until we get to the first block. // Now that we're at the first block, one or more blank lines are the block separators, which we'll need. skipBlankLines = false; while (reader.Peek() != -1) { if (string.IsNullOrWhiteSpace(line)) { ReadNextLine(reader); continue; } for (int index = 0; index < sequenceCount; index++) { if (isFirstBlock) { // First 10 characters are sequence ID, remaining is the first block of sequence // Note that both may contain whitespace, and there may be no whitespace between them. if (line.Length <= 10) { message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name); throw new Exception(message); } id = line.Substring(0, 10).Trim(); sequenceString = line.Substring(10).Replace(" ", ""); byte[] sequenceBytes = System.Text.ASCIIEncoding.ASCII.GetBytes(sequenceString); IAlphabet alphabet = Alphabet; if (null == alphabet) { alphabet = Alphabets.AutoDetectAlphabet(sequenceBytes, 0, sequenceBytes.Length, alphabet); if (null == alphabet) { message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, sequenceString); throw new InvalidDataException(message); } else { if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { throw new InvalidDataException(Properties.Resource.SequenceAlphabetMismatch); } } } } Tuple <Sequence, List <byte> > sequenceStore = new Tuple <Sequence, List <byte> >( new Sequence(alphabet, string.Empty) { ID = id }, new List <byte>()); sequenceStore.Item2.AddRange(sequenceBytes); data.Add(sequenceStore); } else { sequence = data[index]; byte[] sequenceBytes = System.Text.ASCIIEncoding.ASCII.GetBytes(line.Replace(" ", "")); sequence.Item2.AddRange(sequenceBytes); } ReadNextLine(reader); } // Reset the first block flag isFirstBlock = false; } // Validate for the count of sequence if (sequenceCount != data.Count) { throw new InvalidDataException(Properties.Resource.SequenceCountMismatch); } SequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(new AlignedSequence()); foreach (var dataSequence in data) { // Validate for the count of sequence if (sequenceLength != dataSequence.Item2.Count) { throw new InvalidDataException(Properties.Resource.SequenceLengthMismatch); } sequenceAlignment.AlignedSequences[0].Sequences.Add( new Sequence(dataSequence.Item1.Alphabet, dataSequence.Item2.ToArray()) { ID = dataSequence.Item1.ID }); } return(sequenceAlignment); }
/// <summary> /// Parses a single Phylip text from a reader into a sequence. /// 1. First link has Count of Taxa and length of each sequence /// 2. Sequences /// a. First ten character are ID /// b. Sequence itself /// </summary> /// <param name="bioReader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false.</param> /// <returns>A new Sequence Alignment instance containing parsed data.</returns> protected ISequenceAlignment ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly) { if (bioReader == null) { throw new ArgumentNullException("bioReader"); } string message = string.Empty; // Parse first line IList <string> tokens = GetTokens(bioReader.Line); if (2 != tokens.Count) { message = string.Format(CultureInfo.CurrentCulture, Resource.INVAILD_INPUT_FILE, this.Name); throw new InvalidDataException(message); } bool isFirstBlock = true; int sequenceCount = 0; int sequenceLength = 0; IList <Sequence> data = new List <Sequence>(); string id = string.Empty; string sequenceString = string.Empty; Sequence sequence = null; IAlphabet alignmentAlphabet = null; sequenceCount = Int32.Parse(tokens[0], CultureInfo.InvariantCulture); sequenceLength = Int32.Parse(tokens[1], CultureInfo.InvariantCulture); bioReader.GoToNextLine(); // Skip blank lines until we get to the first block. // Now that we're at the first block, one or more blank lines are the block separators, which we'll need. bioReader.SkipBlankLines = false; while (bioReader.HasLines) { if (string.IsNullOrEmpty(bioReader.Line.Trim())) { bioReader.GoToNextLine(); continue; } for (int index = 0; index < sequenceCount; index++) { if (isFirstBlock) { tokens = GetTokens(bioReader.Line); if (1 == tokens.Count) { id = tokens[0].Substring(0, 10); sequenceString = tokens[0].Substring(10); } else { id = tokens[0]; sequenceString = tokens[1]; } IAlphabet alphabet = Alphabet; if (null == alphabet) { alphabet = _basicParser.IdentifyAlphabet(alphabet, sequenceString); if (null == alphabet) { message = string.Format( CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, sequenceString); throw new InvalidDataException(message); } else { if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { message = Properties.Resource.SequenceAlphabetMismatch; throw new InvalidDataException(message); } } } } if (Encoding == null) { sequence = new Sequence(alphabet, sequenceString); } else { sequence = new Sequence(alphabet, Encoding, sequenceString); } sequence.ID = id; sequence.IsReadOnly = false; data.Add(sequence); } else { sequence = data[index]; sequence.InsertRange(sequence.Count, bioReader.Line.Trim()); } bioReader.GoToNextLine(); } // Reset the first block flag isFirstBlock = false; } // Validate for the count of sequence if (sequenceCount != data.Count) { throw new InvalidDataException(Properties.Resource.SequenceCountMismatch); } SequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(new AlignedSequence()); foreach (Sequence dataSequence in data) { dataSequence.IsReadOnly = isReadOnly; // Validate for the count of sequence if (sequenceLength != dataSequence.Count) { throw new InvalidDataException(Properties.Resource.SequenceLengthMismatch); } sequenceAlignment.AlignedSequences[0].Sequences.Add(dataSequence); } return(sequenceAlignment); }
/// <summary> /// Parses a single ClustalW text from a reader into a sequence. /// </summary> /// <param name="bioReader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false.</param> /// <returns>A new Sequence Alignment instance containing parsed data.</returns> protected ISequenceAlignment ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly) { if (bioReader == null) { throw new ArgumentNullException("bioReader"); } string message = string.Empty; if (!bioReader.Line.StartsWith("CLUSTAL", StringComparison.OrdinalIgnoreCase)) { message = string.Format(CultureInfo.CurrentCulture, Resource.INVAILD_INPUT_FILE, this.Name); throw new InvalidDataException(message); } bioReader.GoToNextLine(); // Skip blank lines until we get to the first block. // Now that we're at the first block, one or more blank lines are the block separators, which we'll need. bioReader.SkipBlankLines = false; Dictionary <string, ISequence> mapIdToSequence = new Dictionary <string, ISequence>(); IAlphabet alignmentAlphabet = null; bool isFirstBlock = true; bool inBlock = false; while (bioReader.HasLines) { // Blank line or consensus line signals end of block. if (String.IsNullOrEmpty(bioReader.Line) || Helper.ContainsOnly(bioReader.Line, '*', ' ', '.', '+', ':')) { if (inBlock) { // Blank line signifies end of block inBlock = false; isFirstBlock = false; } } else // It's not a blank or consensus line. { // It's a data line in a block. // Lines begin with sequence id, then the sequence segment, and optionally a number, which we will ignore string[] tokens = bioReader.Line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); // (char[])null uses whitespace delimiters string id = tokens[0]; string data = tokens[1].ToUpper(CultureInfo.InvariantCulture); Sequence sequence = null; IAlphabet alphabet = Alphabet; inBlock = true; if (isFirstBlock) { if (null == alphabet) { alphabet = _basicParser.IdentifyAlphabet(alphabet, data); if (null == alphabet) { message = string.Format( CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, data); throw new InvalidDataException(message); } else { if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { message = string.Format( CultureInfo.CurrentCulture, Properties.Resource.SequenceAlphabetMismatch); throw new InvalidDataException(message); } } } } if (Encoding == null) { sequence = new Sequence(alphabet, data); } else { sequence = new Sequence(alphabet, Encoding, data); } sequence.ID = id; sequence.IsReadOnly = false; mapIdToSequence.Add(id, sequence); } else { if (!mapIdToSequence.ContainsKey(id)) { message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.ClustalUnknownSequence, id); throw new InvalidDataException(message); } sequence = (Sequence)mapIdToSequence[id]; sequence.InsertRange(sequence.Count, data); } } bioReader.GoToNextLine(); } SequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(new AlignedSequence()); foreach (Sequence alignmentSequence in mapIdToSequence.Values) { alignmentSequence.IsReadOnly = isReadOnly; sequenceAlignment.AlignedSequences[0].Sequences.Add(alignmentSequence); } return(sequenceAlignment); }
/// <summary> /// Parses a single biological sequence alignment text from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <returns>The parsed ISequenceAlignment object.</returns> ISequenceAlignment ParseOne(TextReader reader) { ReadNextLine(reader); if (line == null) { throw new Exception(Properties.Resource.INVALID_INPUT_FILE); } this.ParseHeader(reader); var alignedSequence = new AlignedSequence(); IList <string> ids = null; bool isInBlock = true; if (this.line.StartsWith("begin", StringComparison.OrdinalIgnoreCase)) { while (this.line != null && isInBlock) { if (string.IsNullOrEmpty(this.line.Trim())) { this.ReadNextLine(reader); continue; } string blockName = GetTokens(this.line)[1]; switch (blockName.ToUpperInvariant()) { case "TAXA": case "TAXA;": // This block contains the count of sequence & title of each sequence ids = this.ParseTaxaBlock(reader); break; case "CHARACTERS": case "CHARACTERS;": // Block contains sequences Dictionary <string, string> dataSet = this.ParseCharacterBlock(reader, ids); IAlphabet alignmentAlphabet = null; foreach (string id in ids) { IAlphabet alphabet = this.Alphabet; string data = dataSet[id]; if (null == alphabet) { byte[] dataArray = data.ToByteArray(); alphabet = Alphabets.AutoDetectAlphabet(dataArray, 0, dataArray.Length, null); if (null == alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, data)); } if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { throw new InvalidDataException(string.Format( CultureInfo.InvariantCulture, Properties.Resource.SequenceAlphabetMismatch)); } } } alignedSequence.Sequences.Add(new Sequence(alphabet, data) { ID = id }); } break; case "END": case "END;": // Have reached the end of block isInBlock = false; break; default: // skip this block while (this.line != null) { this.ReadNextLine(reader); if (0 == string.Compare(this.line, "end;", StringComparison.OrdinalIgnoreCase)) { break; } } break; } this.ReadNextLine(reader); } } ISequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(alignedSequence); return(sequenceAlignment); }
/// <summary> /// Parses a single Phylip text from a reader into a sequence. /// 1. First link has Count of Taxa and length of each sequence /// 2. Sequences /// a. First ten character are ID /// b. Sequence itself /// </summary> /// <param name="mbfReader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false.</param> /// <returns>A new Sequence Alignment instance containing parsed data.</returns> protected ISequenceAlignment ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly) { if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } string message = string.Empty; // Parse first line IList <string> tokens = mbfReader.Line.Split((char[])null, StringSplitOptions.RemoveEmptyEntries); if (2 != tokens.Count) { message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, this.Name); throw new InvalidDataException(message); } bool isFirstBlock = true; int sequenceCount = 0; int sequenceLength = 0; IList <Sequence> data = new List <Sequence>(); string id = string.Empty; string sequenceString = string.Empty; Sequence sequence = null; IAlphabet alignmentAlphabet = null; sequenceCount = Int32.Parse(tokens[0], CultureInfo.InvariantCulture); sequenceLength = Int32.Parse(tokens[1], CultureInfo.InvariantCulture); mbfReader.GoToNextLine(); // Skip blank lines until we get to the first block. // Now that we're at the first block, one or more blank lines are the block separators, which we'll need. mbfReader.SkipBlankLines = false; while (mbfReader.HasLines) { if (string.IsNullOrEmpty(mbfReader.Line.Trim())) { mbfReader.GoToNextLine(); continue; } for (int index = 0; index < sequenceCount; index++) { if (isFirstBlock) { // First 10 characters are sequence ID, remaining is the first block of sequence // Note that both may contain whitespace, and there may be no whitespace between them. if (mbfReader.Line.Length <= 10) { message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, this.Name); throw new Exception(message); } id = mbfReader.Line.Substring(0, 10).Trim(); sequenceString = Util.Helper.StringRemoveWhitespace(mbfReader.Line.Substring(10)); IAlphabet alphabet = Alphabet; if (null == alphabet) { alphabet = _basicParser.IdentifyAlphabet(alphabet, sequenceString); if (null == alphabet) { message = string.Format( CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, sequenceString); throw new InvalidDataException(message); } else { if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { message = Properties.Resource.SequenceAlphabetMismatch; throw new InvalidDataException(message); } } } } if (Encoding == null) { sequence = new Sequence(alphabet, sequenceString); } else { sequence = new Sequence(alphabet, Encoding, sequenceString); } sequence.ID = id; sequence.IsReadOnly = false; data.Add(sequence); } else { sequence = data[index]; sequence.InsertRange(sequence.Count, Util.Helper.StringRemoveWhitespace(mbfReader.Line)); } mbfReader.GoToNextLine(); } // Reset the first block flag isFirstBlock = false; } // Validate for the count of sequence if (sequenceCount != data.Count) { throw new InvalidDataException(Properties.Resource.SequenceCountMismatch); } SequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(new AlignedSequence()); foreach (Sequence dataSequence in data) { dataSequence.IsReadOnly = isReadOnly; // Validate for the count of sequence if (sequenceLength != dataSequence.Count) { throw new InvalidDataException(Properties.Resource.SequenceLengthMismatch); } sequenceAlignment.AlignedSequences[0].Sequences.Add(dataSequence); } return(sequenceAlignment); }
/// <summary> /// Parses a single biological sequence alignment text from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <returns>The parsed ISequenceAlignment object.</returns> public ISequenceAlignment ParseOne(TextReader reader) { if (reader == null) { throw new ArgumentNullException("reader"); } ReadNextLine(reader); if (line == null) { string message = Properties.Resource.INVALID_INPUT_FILE; Trace.Report(message); throw new FileFormatException(message); } else { ParseHeader(reader); string message = string.Empty; ISequenceAlignment sequenceAlignment = new SequenceAlignment(); sequenceAlignment.AlignedSequences.Add(new AlignedSequence()); IList <string> ids = null; bool isInBlock = true; if (line.StartsWith("begin", StringComparison.OrdinalIgnoreCase)) { while (line != null && isInBlock) { if (string.IsNullOrEmpty(line.Trim())) { ReadNextLine(reader); continue; } string blockName = GetTokens(line)[1]; switch (blockName.ToUpper(CultureInfo.InvariantCulture)) { case "TAXA": case "TAXA;": // This block contains the count of sequence & title of each sequence ids = (IList <string>)ParseTaxaBlock(reader); break; case "CHARACTERS": case "CHARACTERS;": // Block contains sequences Dictionary <string, string> dataSet = ParseCharacterBlock(reader, ids); IAlphabet alignmentAlphabet = null; string data = string.Empty; foreach (string ID in ids) { IAlphabet alphabet = Alphabet; Sequence sequence = null; data = dataSet[ID]; if (null == alphabet) { byte[] dataArray = data.Select(a => (byte)a).ToArray(); alphabet = Alphabets.AutoDetectAlphabet(dataArray, 0, dataArray.Length, null); if (null == alphabet) { message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, data); throw new InvalidDataException(message); } else { if (null == alignmentAlphabet) { alignmentAlphabet = alphabet; } else { if (alignmentAlphabet != alphabet) { message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.SequenceAlphabetMismatch); throw new InvalidDataException(message); } } } } sequence = new Sequence(alphabet, data); sequence.ID = ID; sequenceAlignment.AlignedSequences[0].Sequences.Add(sequence); } break; case "END": case "END;": // Have reached the end of block isInBlock = false; break; default: // skip this block while (line != null) { ReadNextLine(reader); if (0 == string.Compare(line, "end;", StringComparison.OrdinalIgnoreCase)) { break; } } break; } ReadNextLine(reader); } } return(sequenceAlignment); } }