示例#1
0
        /// <summary>
        /// Generate profiles from one single sequence
        /// The set of sequence items of the seq should be the same as
        /// 'static ItemSet' of this class.
        /// </summary>
        /// <param name="seq">an input sequence</param>
        public static IProfiles GenerateProfiles(ISequence seq)
        {
            IProfiles profiles;
            int       sequenceLength = seq.Count;

            int colSize = ItemSet.Count;

            profiles = new Profiles(sequenceLength, colSize);

            for (int i = 0; i < sequenceLength; ++i)
            {
                try
                {
                    if (seq[i].IsAmbiguous)
                    {
                        //Console.WriteLine("residue {0} is {1}, ambiguous? {2}", i, seq[i].Symbol, seq[i].IsAmbiguous);
                        for (int b = 0; b < AmbiguousCharactersMap[seq[i]].Count; ++b)
                        {
                            ++(profiles[i][ItemSet[AmbiguousCharactersMap[seq[i]][b]]]);
                        }
                    }
                    else
                    {
                        ++(profiles[i][ItemSet[seq[i]]]);
                    }
                }
                catch (IndexOutOfRangeException ex)
                {
                    throw new Exception("Invalid alphabet", ex.InnerException);
                }
                //MsaUtils.Normalize(profiles[i]);
            }
            profiles.ColumnSize = colSize;
            profiles.RowSize    = sequenceLength;
            return(profiles);
        }
示例#2
0
        /// <summary>
        /// Generate IProfiles from a subset of aligned sequences.
        /// In the subset of sequences, those columns containing no residues,
        /// i.e. indels only, are discarded.
        /// </summary>
        /// <param name="sequences">a set of aligned sequences</param>
        /// <param name="sequenceIndices">the subset indices of the aligned sequences</param>
        /// <param name="allIndelPositions">the list of all-indel positions that have been removed when constructing</param>
        /// <param name="weights">sequence weights</param>
        public static IProfiles GenerateProfiles(List <ISequence> sequences, List <int> sequenceIndices, out List <int> allIndelPositions, float[] weights)
        {
            IProfiles profiles;

            if (sequences.Count <= 0)
            {
                throw new ArgumentException("Empty input sequences");
            }
            if (sequenceIndices.Count > sequences.Count)
            {
                throw new ArgumentException("Invalid subset indices");
            }

            MsaUtils.Normalize(weights);

            try
            {
                int       sequenceLength = sequences[sequenceIndices[0]].Count;
                IAlphabet alphabet       = sequences[sequenceIndices[0]].Alphabet;

                foreach (int i in sequenceIndices)
                {
                    if (sequences[i].Count != sequenceLength)
                    {
                        throw new ArgumentException("Input sequences are not aligned");
                    }
                    if (sequences[i].Alphabet != alphabet)
                    {
                        throw new ArgumentException("Input sequences use different alphabets");
                    }
                }

                allIndelPositions = new List <int>();

                profiles = new Profiles();
                int colSize = ItemSet.Count;

                // Discard all indels columns.
                for (int col = 0; col < sequenceLength; ++col)
                {
                    float[] vector      = new float[colSize];
                    bool    isAllIndels = true;
                    foreach (int i in sequenceIndices)
                    {
                        if (!sequences[i][col].IsGap)
                        {
                            isAllIndels = false;
                        }
                        if (sequences[i][col].IsAmbiguous)
                        {
                            //Console.WriteLine("residue {0} is {1}, ambiguous? {2}", i, seq[i].Symbol, seq[i].IsAmbiguous);
                            for (int b = 0; b < AmbiguousCharactersMap[sequences[i][col]].Count; ++b)
                            {
                                vector[ItemSet[AmbiguousCharactersMap[sequences[i][col]][b]]] += weights[i];
                            }
                        }
                        else
                        {
                            vector[ItemSet[sequences[i][col]]] += weights[i];
                        }
                    }
                    if (!isAllIndels)
                    {
                        MsaUtils.Normalize(vector);
                        profiles.ProfilesMatrix.Add(vector);
                    }
                    else
                    {
                        allIndelPositions.Add(col);
                    }
                }
                profiles.ColumnSize = colSize;
                profiles.RowSize    = profiles.ProfilesMatrix.Count;
            }
            catch (IndexOutOfRangeException ex)
            {
                throw new Exception("Invalid index", ex.InnerException);
            }
            return(profiles);
        }