Exemple #1
0
        /*
         * string s의 모든 로테이션을 구하고, 그것들로 matrix를 구성한다음,
         * 그 matrxix의 row들을 lexically하게 정렬한 후, 각 row들의 마지막 문자들을 취한 것이 BWT이다.
         *
         */
        static private BWTransformation BWTransform(string s)
        {
            int l = s.Length;

            char[,] ar = AllRotations(s);
            List <SortBuffer> sl = new List <SortBuffer>();

            string[] sa = new string[l];
            sa = Useful.Char2DArrToStringArr(ar, l, l);
            for (int i = 0; i < l; i++)
            {
                sl.Add(new SortBuffer(sa[i], i));
            }
            char[] r   = new char[l];
            int[]  rps = new int[l];
            //List<string> sl = sa.ToList<string>();
            StringBuilder            f     = new StringBuilder();
            IEnumerable <SortBuffer> query = sl.OrderBy(sb => sb.s);

            sl = new List <SortBuffer>();
            foreach (SortBuffer e in query)
            {
                sl.Add(e);
            }
            //Making BWM is done.
            for (int i = 0; i < l; i++)
            {
                r[i] = sl[i].s[l - 1]; f.Append(sl[i].s[0]); rps[i] = sl[i].rp;
            }
            BWTransformation tr = new BWTransformation();

            tr.L        = Useful.CharToString(r, l);
            tr.F        = f.ToString();
            tr.inputidx = sl[0].rp;
            tr.rps      = Useful.ArrayCopy <int>(rps, l);
            return(tr);
        }
        public static GibbsMotifData ForStringSeq(string[] Seqs, int L, int W)//W is motif length and L is the length of one sequence.
        {
            int N = Seqs.Length;

            int[]  setA = new int[N]; //set of starting indices
            int    pa;                //previous a.
            int    seq1idx;
            string seq1;
            WeightMatrixForGibbs <char> wmat;
            Random rnd = new Random();

            for (int i = 0; i < N; i++)
            {
                setA[i] = rnd.Next(L - W + 1);
            }
            do
            {
                seq1idx       = rnd.Next(N);
                seq1          = Seqs[seq1idx];
                wmat          = MakeWeightMatrixForStringSeq(Seqs, N, W, seq1idx, setA);
                pa            = setA[seq1idx];
                setA[seq1idx] = wmat.ArgMaxScore(seq1.ToArray <char>(), N, L);
                Console.WriteLine(Useful.CharToString(wmat.MotifMax(), W));
                Console.WriteLine(CutString(seq1, setA[seq1idx], setA[seq1idx] + W - 1));
                Console.ReadLine();
            } while ((setA[seq1idx] != pa) || (Useful.CharToString(wmat.MotifMax(), W) != CutString(seq1, setA[seq1idx], setA[seq1idx] + W - 1)));
            //(Useful.CharToString(wmat.MotifMax(),W) != CutString(seq1, setA[seq1idx], setA[seq1idx]+W-1))
            //위 조건은 좀 위험한게, shungke처럼 정확한 motif가 있으면 상관없는데, weak motif일경우 조건을 절대로 만족할 수 없게 될지도...
            StringBuilder motif = new StringBuilder();

            for (int i = 0; i < W; i++)
            {
                motif.Append(seq1[pa + i]);
            }
            return(new GibbsMotifData(motif.ToString(), wmat, Seqs, N, L, W));
        }