internal ProteinSequence(NucleotideSequence seq) { if (seq.ActiveAlphabet == AlphabetType.StrictDna || seq.ActiveAlphabet == AlphabetType.StrictRna) { if (seq.Sequence.Length % 3 != 0) { throw new ArgumentException("Sequence length is not evenly divisible by three, which means it cannot be translated because you are using a strict nucleotide alphabet"); } } AlphabetType alphabet; var allowedSymbols = new HashSet <AminoAcid> { AminoAcid.Stop, AminoAcid.Gap }; switch (seq.ActiveAlphabet) { case AlphabetType.AmbiguousDna: alphabet = AlphabetType.ExtendedProtein; allowedSymbols.UnionWith(AlphabetDataProvider.ExtendedProtein); break; case AlphabetType.AmbiguousRna: alphabet = AlphabetType.ExtendedProtein; allowedSymbols.UnionWith(AlphabetDataProvider.ExtendedProtein); break; default: alphabet = AlphabetType.StandardProtein; allowedSymbols.UnionWith(AlphabetDataProvider.StandardProtein); break; } var translationTable = new Dictionary <string, AminoAcid>(AlphabetDataProvider.GetTranslationTable(seq.GeneticCode, seq.ActiveAlphabet)); //string safeSequence, AlphabetType alphabet, GeneticCode geneticCode, Dictionary<Nucleotide, long> symbolCounts ActiveAlphabet = seq.ActiveAlphabet; _proteinAlphabet = new ProteinAlphabet(alphabet, seq.GeneticCode); var proteinBlob = Translate(seq.Sequence, translationTable); Sequence = proteinBlob.Sequence; _aminoCounts = proteinBlob.AminoCounts; }
public ProteinSequence(string rawSequence, AlphabetType desiredProteinAlphabet, GeneticCode geneticCode = GeneticCode.Standard) { switch (desiredProteinAlphabet) { case AlphabetType.ExtendedProtein: break; case AlphabetType.StandardProtein: break; default: throw new ArgumentException(String.Format(ProteinAlphabet.InvalidProteinAlphabet, desiredProteinAlphabet)); } _proteinAlphabet = new ProteinAlphabet(desiredProteinAlphabet, geneticCode); ActiveAlphabet = desiredProteinAlphabet; AllowedSymbols = AlphabetDataProvider.GetAllowedProteinSymbols(ActiveAlphabet); _aminoCounts = new Dictionary <AminoAcid, long>(AllowedSymbols.Count); foreach (var symbol in AllowedSymbols) { _aminoCounts.Add(symbol, 0); } var trimmedRaw = rawSequence.Trim(); foreach (var aminoCharacter in trimmedRaw) { var typedAmino = (AminoAcid)Char.ToUpperInvariant(aminoCharacter); if (!AllowedSymbols.Contains(typedAmino)) { throw new ArgumentException(String.Format(_invalidAminoAcidCharacter, aminoCharacter, _proteinAlphabet)); } _aminoCounts[typedAmino]++; } Sequence = trimmedRaw; }