public NucleotideAlphabet(AlphabetType nucleotideAlphabet, GeneticCode geneticCode)
        {
            if (nucleotideAlphabet == AlphabetType.ExtendedProtein || nucleotideAlphabet == AlphabetType.StandardProtein)
            {
                throw new ArgumentException(String.Format(AlphabetDataProvider.InvalidNucleotideAlphabet, nucleotideAlphabet));
            }

            GeneticCode        = geneticCode;
            AllowedSymbols     = AlphabetDataProvider.GetAllowedNucleotideSymbols(nucleotideAlphabet);
            ComplementTable    = AlphabetDataProvider.GetComplementTable(nucleotideAlphabet);
            TranscriptionTable = AlphabetDataProvider.GetTranscriptionTable(nucleotideAlphabet);
            TranslationTable   = AlphabetDataProvider.GetTranslationTable(geneticCode, nucleotideAlphabet);
            GcContentSymbols   = AlphabetDataProvider.GcContentSymbols(nucleotideAlphabet);
        }
        internal ProteinSequence(NucleotideSequence seq)
        {
            if (seq.ActiveAlphabet == AlphabetType.StrictDna || seq.ActiveAlphabet == AlphabetType.StrictRna)
            {
                if (seq.Sequence.Length % 3 != 0)
                {
                    throw new ArgumentException("Sequence length is not evenly divisible by three, which means it cannot be translated because you are using a strict nucleotide alphabet");
                }
            }

            AlphabetType alphabet;
            var          allowedSymbols = new HashSet <AminoAcid> {
                AminoAcid.Stop, AminoAcid.Gap
            };

            switch (seq.ActiveAlphabet)
            {
            case AlphabetType.AmbiguousDna:
                alphabet = AlphabetType.ExtendedProtein;
                allowedSymbols.UnionWith(AlphabetDataProvider.ExtendedProtein);
                break;

            case AlphabetType.AmbiguousRna:
                alphabet = AlphabetType.ExtendedProtein;
                allowedSymbols.UnionWith(AlphabetDataProvider.ExtendedProtein);
                break;

            default:
                alphabet = AlphabetType.StandardProtein;
                allowedSymbols.UnionWith(AlphabetDataProvider.StandardProtein);
                break;
            }
            var translationTable = new Dictionary <string, AminoAcid>(AlphabetDataProvider.GetTranslationTable(seq.GeneticCode, seq.ActiveAlphabet));

            //string safeSequence, AlphabetType alphabet, GeneticCode geneticCode, Dictionary<Nucleotide, long> symbolCounts
            ActiveAlphabet   = seq.ActiveAlphabet;
            _proteinAlphabet = new ProteinAlphabet(alphabet, seq.GeneticCode);

            var proteinBlob = Translate(seq.Sequence, translationTable);

            Sequence     = proteinBlob.Sequence;
            _aminoCounts = proteinBlob.AminoCounts;
        }
        public ProteinSequence(string rawSequence, AlphabetType desiredProteinAlphabet, GeneticCode geneticCode = GeneticCode.Standard)
        {
            switch (desiredProteinAlphabet)
            {
            case AlphabetType.ExtendedProtein:
                break;

            case AlphabetType.StandardProtein:
                break;

            default:
                throw new ArgumentException(String.Format(ProteinAlphabet.InvalidProteinAlphabet, desiredProteinAlphabet));
            }

            _proteinAlphabet = new ProteinAlphabet(desiredProteinAlphabet, geneticCode);
            ActiveAlphabet   = desiredProteinAlphabet;
            AllowedSymbols   = AlphabetDataProvider.GetAllowedProteinSymbols(ActiveAlphabet);

            _aminoCounts = new Dictionary <AminoAcid, long>(AllowedSymbols.Count);
            foreach (var symbol in AllowedSymbols)
            {
                _aminoCounts.Add(symbol, 0);
            }

            var trimmedRaw = rawSequence.Trim();

            foreach (var aminoCharacter in trimmedRaw)
            {
                var typedAmino = (AminoAcid)Char.ToUpperInvariant(aminoCharacter);
                if (!AllowedSymbols.Contains(typedAmino))
                {
                    throw new ArgumentException(String.Format(_invalidAminoAcidCharacter, aminoCharacter, _proteinAlphabet));
                }
                _aminoCounts[typedAmino]++;
            }
            Sequence = trimmedRaw;
        }
        public ConsensusBuilder(IEnumerable <ISequence> sequences)
        {
            _sequences = sequences.ToList();

            var firstElement = _sequences.First();

            _sequenceLength = firstElement.Sequence.Length;
            _alphabet       = firstElement.ActiveAlphabet;
            foreach (var sequence in _sequences)
            {
                if (sequence.Sequence.Length != _sequenceLength)
                {
                    throw new ArgumentException("Sequences were not all of uniform length");
                }

                if (_alphabet != sequence.ActiveAlphabet)
                {
                    throw new ArgumentException("Sequences must all be of the same type");
                }
            }

            char[] alphabetChars;

            switch (_alphabet)
            {
            case AlphabetType.StrictRna:
            case AlphabetType.AmbiguousRna:
            case AlphabetType.StrictDna:
            case AlphabetType.AmbiguousDna:
            {
                var symbols = AlphabetDataProvider.GetAllowedNucleotideSymbols(_alphabet).ToArray();
                alphabetChars = new char[symbols.Length];

                for (var i = 0; i < symbols.Length; i++)
                {
                    alphabetChars[i] = (char)symbols[i];
                }
            }
            break;

            case AlphabetType.ExtendedProtein:
            case AlphabetType.StandardProtein:
            {
                var symbols = AlphabetDataProvider.GetAllowedProteinSymbols(_alphabet).ToArray();
                alphabetChars = new char[symbols.Length];

                for (var i = 0; i < symbols.Length; i++)
                {
                    alphabetChars[i] = (char)symbols[i];
                }
            }
            break;

            default:
                throw new ArgumentException("Unknown alphabet type");
            }

            _consensusMatrix = new Dictionary <char, int[]>(alphabetChars.Length);

            foreach (var character in alphabetChars)
            {
                _consensusMatrix.Add(character, new int[_sequenceLength]);
            }
        }