public NucleotideAlphabet(AlphabetType nucleotideAlphabet, GeneticCode geneticCode) { if (nucleotideAlphabet == AlphabetType.ExtendedProtein || nucleotideAlphabet == AlphabetType.StandardProtein) { throw new ArgumentException(String.Format(AlphabetDataProvider.InvalidNucleotideAlphabet, nucleotideAlphabet)); } GeneticCode = geneticCode; AllowedSymbols = AlphabetDataProvider.GetAllowedNucleotideSymbols(nucleotideAlphabet); ComplementTable = AlphabetDataProvider.GetComplementTable(nucleotideAlphabet); TranscriptionTable = AlphabetDataProvider.GetTranscriptionTable(nucleotideAlphabet); TranslationTable = AlphabetDataProvider.GetTranslationTable(geneticCode, nucleotideAlphabet); GcContentSymbols = AlphabetDataProvider.GcContentSymbols(nucleotideAlphabet); }
internal ProteinSequence(NucleotideSequence seq) { if (seq.ActiveAlphabet == AlphabetType.StrictDna || seq.ActiveAlphabet == AlphabetType.StrictRna) { if (seq.Sequence.Length % 3 != 0) { throw new ArgumentException("Sequence length is not evenly divisible by three, which means it cannot be translated because you are using a strict nucleotide alphabet"); } } AlphabetType alphabet; var allowedSymbols = new HashSet <AminoAcid> { AminoAcid.Stop, AminoAcid.Gap }; switch (seq.ActiveAlphabet) { case AlphabetType.AmbiguousDna: alphabet = AlphabetType.ExtendedProtein; allowedSymbols.UnionWith(AlphabetDataProvider.ExtendedProtein); break; case AlphabetType.AmbiguousRna: alphabet = AlphabetType.ExtendedProtein; allowedSymbols.UnionWith(AlphabetDataProvider.ExtendedProtein); break; default: alphabet = AlphabetType.StandardProtein; allowedSymbols.UnionWith(AlphabetDataProvider.StandardProtein); break; } var translationTable = new Dictionary <string, AminoAcid>(AlphabetDataProvider.GetTranslationTable(seq.GeneticCode, seq.ActiveAlphabet)); //string safeSequence, AlphabetType alphabet, GeneticCode geneticCode, Dictionary<Nucleotide, long> symbolCounts ActiveAlphabet = seq.ActiveAlphabet; _proteinAlphabet = new ProteinAlphabet(alphabet, seq.GeneticCode); var proteinBlob = Translate(seq.Sequence, translationTable); Sequence = proteinBlob.Sequence; _aminoCounts = proteinBlob.AminoCounts; }
public ProteinSequence(string rawSequence, AlphabetType desiredProteinAlphabet, GeneticCode geneticCode = GeneticCode.Standard) { switch (desiredProteinAlphabet) { case AlphabetType.ExtendedProtein: break; case AlphabetType.StandardProtein: break; default: throw new ArgumentException(String.Format(ProteinAlphabet.InvalidProteinAlphabet, desiredProteinAlphabet)); } _proteinAlphabet = new ProteinAlphabet(desiredProteinAlphabet, geneticCode); ActiveAlphabet = desiredProteinAlphabet; AllowedSymbols = AlphabetDataProvider.GetAllowedProteinSymbols(ActiveAlphabet); _aminoCounts = new Dictionary <AminoAcid, long>(AllowedSymbols.Count); foreach (var symbol in AllowedSymbols) { _aminoCounts.Add(symbol, 0); } var trimmedRaw = rawSequence.Trim(); foreach (var aminoCharacter in trimmedRaw) { var typedAmino = (AminoAcid)Char.ToUpperInvariant(aminoCharacter); if (!AllowedSymbols.Contains(typedAmino)) { throw new ArgumentException(String.Format(_invalidAminoAcidCharacter, aminoCharacter, _proteinAlphabet)); } _aminoCounts[typedAmino]++; } Sequence = trimmedRaw; }
public ConsensusBuilder(IEnumerable <ISequence> sequences) { _sequences = sequences.ToList(); var firstElement = _sequences.First(); _sequenceLength = firstElement.Sequence.Length; _alphabet = firstElement.ActiveAlphabet; foreach (var sequence in _sequences) { if (sequence.Sequence.Length != _sequenceLength) { throw new ArgumentException("Sequences were not all of uniform length"); } if (_alphabet != sequence.ActiveAlphabet) { throw new ArgumentException("Sequences must all be of the same type"); } } char[] alphabetChars; switch (_alphabet) { case AlphabetType.StrictRna: case AlphabetType.AmbiguousRna: case AlphabetType.StrictDna: case AlphabetType.AmbiguousDna: { var symbols = AlphabetDataProvider.GetAllowedNucleotideSymbols(_alphabet).ToArray(); alphabetChars = new char[symbols.Length]; for (var i = 0; i < symbols.Length; i++) { alphabetChars[i] = (char)symbols[i]; } } break; case AlphabetType.ExtendedProtein: case AlphabetType.StandardProtein: { var symbols = AlphabetDataProvider.GetAllowedProteinSymbols(_alphabet).ToArray(); alphabetChars = new char[symbols.Length]; for (var i = 0; i < symbols.Length; i++) { alphabetChars[i] = (char)symbols[i]; } } break; default: throw new ArgumentException("Unknown alphabet type"); } _consensusMatrix = new Dictionary <char, int[]>(alphabetChars.Length); foreach (var character in alphabetChars) { _consensusMatrix.Add(character, new int[_sequenceLength]); } }