Пример #1
0
        internal ProteinSequence(NucleotideSequence seq)
        {
            if (seq.ActiveAlphabet == AlphabetType.StrictDna || seq.ActiveAlphabet == AlphabetType.StrictRna)
            {
                if (seq.Sequence.Length % 3 != 0)
                {
                    throw new ArgumentException("Sequence length is not evenly divisible by three, which means it cannot be translated because you are using a strict nucleotide alphabet");
                }
            }

            AlphabetType alphabet;
            var          allowedSymbols = new HashSet <AminoAcid> {
                AminoAcid.Stop, AminoAcid.Gap
            };

            switch (seq.ActiveAlphabet)
            {
            case AlphabetType.AmbiguousDna:
                alphabet = AlphabetType.ExtendedProtein;
                allowedSymbols.UnionWith(AlphabetDataProvider.ExtendedProtein);
                break;

            case AlphabetType.AmbiguousRna:
                alphabet = AlphabetType.ExtendedProtein;
                allowedSymbols.UnionWith(AlphabetDataProvider.ExtendedProtein);
                break;

            default:
                alphabet = AlphabetType.StandardProtein;
                allowedSymbols.UnionWith(AlphabetDataProvider.StandardProtein);
                break;
            }
            var translationTable = new Dictionary <string, AminoAcid>(AlphabetDataProvider.GetTranslationTable(seq.GeneticCode, seq.ActiveAlphabet));

            //string safeSequence, AlphabetType alphabet, GeneticCode geneticCode, Dictionary<Nucleotide, long> symbolCounts
            ActiveAlphabet   = seq.ActiveAlphabet;
            _proteinAlphabet = new ProteinAlphabet(alphabet, seq.GeneticCode);

            var proteinBlob = Translate(seq.Sequence, translationTable);

            Sequence     = proteinBlob.Sequence;
            _aminoCounts = proteinBlob.AminoCounts;
        }
Пример #2
0
        public ProteinSequence(string rawSequence, AlphabetType desiredProteinAlphabet, GeneticCode geneticCode = GeneticCode.Standard)
        {
            switch (desiredProteinAlphabet)
            {
            case AlphabetType.ExtendedProtein:
                break;

            case AlphabetType.StandardProtein:
                break;

            default:
                throw new ArgumentException(String.Format(ProteinAlphabet.InvalidProteinAlphabet, desiredProteinAlphabet));
            }

            _proteinAlphabet = new ProteinAlphabet(desiredProteinAlphabet, geneticCode);
            ActiveAlphabet   = desiredProteinAlphabet;
            AllowedSymbols   = AlphabetDataProvider.GetAllowedProteinSymbols(ActiveAlphabet);

            _aminoCounts = new Dictionary <AminoAcid, long>(AllowedSymbols.Count);
            foreach (var symbol in AllowedSymbols)
            {
                _aminoCounts.Add(symbol, 0);
            }

            var trimmedRaw = rawSequence.Trim();

            foreach (var aminoCharacter in trimmedRaw)
            {
                var typedAmino = (AminoAcid)Char.ToUpperInvariant(aminoCharacter);
                if (!AllowedSymbols.Contains(typedAmino))
                {
                    throw new ArgumentException(String.Format(_invalidAminoAcidCharacter, aminoCharacter, _proteinAlphabet));
                }
                _aminoCounts[typedAmino]++;
            }
            Sequence = trimmedRaw;
        }