예제 #1
0
        //http://rosalind.info/problems/pdst/

        public CreatingADistanceMatrix()
        {
            string[] dnaStrings   = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToArray();
            double   stringLength = dnaStrings[0].Length;

            for (int i = 0; i < dnaStrings.Length; i++)
            {
                for (int j = 0; j < dnaStrings.Length; j++)
                {
                    double pDistance = 0;

                    if (i != j)
                    {
                        for (int k = 0; k < stringLength; k++)
                        {
                            if (dnaStrings[i][k] != dnaStrings[j][k])
                            {
                                pDistance++;
                            }
                        }
                    }

                    Console.Write((pDistance / stringLength).ToString("0.00000") + " ");
                }
                Console.Write(Environment.NewLine);
            }
        }
예제 #2
0
        //http://rosalind.info/problems/lcsm/

        public FindingASharedMotif()
        {
            List <string> input      = File.ReadAllLines(@"C:\code\dataset.txt").ToList();
            var           dnaStrings = FASTAToDictionary.Convert(input).Values.ToList();

            dnaStrings = dnaStrings.OrderBy(s => s.Length).ToList();

            List <string> subStrings = new List <string>();

            for (int i = 0; i < dnaStrings[0].Length; i++)
            {
                for (int j = 1; j <= dnaStrings[0].Length - i; j++)
                {
                    subStrings.Add(dnaStrings[0].Substring(i, j));
                }
            }

            subStrings = subStrings.OrderByDescending(s => s.Length).ToList();

            foreach (string subString in subStrings)
            {
                if (dnaStrings.All(s => s.Contains(subString)))
                {
                    Console.WriteLine(subString);
                    return;
                }
            }
        }
예제 #3
0
        //http://rosalind.info/problems/edit/

        public EditDistance()
        {
            List <string> dnaStrings  = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();
            int           changeCount = Distance(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1, new Dictionary <KeyValuePair <int, int>, int>());

            Console.WriteLine(changeCount);
        }
예제 #4
0
        //http://rosalind.info/problems/splc/

        public RnaSplicing()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();
            string        dnaString  = dnaStrings[0];

            for (int i = 1; i < dnaStrings.Count; i++)
            {
                dnaString = dnaString.Replace(dnaStrings[i], "");
            }

            dnaString = dnaString.Replace('T', 'U');

            using (var stringReader = new StringReader(dnaString))
            {
                string aa = "";

                while (aa != "Stop")
                {
                    char[] buffer = new char[3];
                    stringReader.ReadBlock(buffer, 0, 3);

                    aa = string.Join("", buffer).ConvertCodon();

                    if (aa != "Stop")
                    {
                        Console.Write(aa);
                    }
                }
            }
        }
예제 #5
0
        //http://rosalind.info/problems/lcsq/

        public FindingASharedSplicedMotifAlternative2()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();

            blu(dnaStrings[0], dnaStrings[1], "ACGT");
            blu(dnaStrings[0], dnaStrings[1], "ACTG");
            blu(dnaStrings[0], dnaStrings[1], "AGCT");
            blu(dnaStrings[0], dnaStrings[1], "AGTC");
            blu(dnaStrings[0], dnaStrings[1], "ATGC");
            blu(dnaStrings[0], dnaStrings[1], "ATCG");
            blu(dnaStrings[0], dnaStrings[1], "CAGT");
            blu(dnaStrings[0], dnaStrings[1], "CATG");
            blu(dnaStrings[0], dnaStrings[1], "CGAT");
            blu(dnaStrings[0], dnaStrings[1], "CGTA");
            blu(dnaStrings[0], dnaStrings[1], "CTAG");
            blu(dnaStrings[0], dnaStrings[1], "CTGA");
            blu(dnaStrings[0], dnaStrings[1], "GCAT");
            blu(dnaStrings[0], dnaStrings[1], "GCTA");
            blu(dnaStrings[0], dnaStrings[1], "GACT");
            blu(dnaStrings[0], dnaStrings[1], "GATC");
            blu(dnaStrings[0], dnaStrings[1], "GTCA");
            blu(dnaStrings[0], dnaStrings[1], "GTAC");
            blu(dnaStrings[0], dnaStrings[1], "TCGA");
            blu(dnaStrings[0], dnaStrings[1], "TCAG");
            blu(dnaStrings[0], dnaStrings[1], "TAGC");
            blu(dnaStrings[0], dnaStrings[1], "TACG");
            blu(dnaStrings[0], dnaStrings[1], "TGAC");
            blu(dnaStrings[0], dnaStrings[1], "TGCA");
        }
예제 #6
0
        //http://rosalind.info/problems/kmp/

        public SpeedingUpMotifFinding()
        {
            string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value;

            int sequence = 0;

            for (int i = 0; i < input.Length; i++)
            {
                if (i != 0 && input[sequence] == input[i])
                {
                    sequence++;
                }
                else if (sequence != 0)
                {
                    for (int j = sequence; j >= 0; j--)
                    {
                        if (input.Substring(0, j).Equals(input.Substring(i - j + 1, j)))
                        {
                            sequence = j;
                            break;
                        }
                    }
                }
                else
                {
                    sequence = 0;
                }

                Console.Write(sequence + " ");
            }
        }
예제 #7
0
        //http://rosalind.info/problems/pmch/

        public PerfectMatchingAndRnaSecondaryStructuresAlternative()
        {
            string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value;

            _perfectLength = input.Length / 2;

            List <List <int> > validConnections = ValidConnections(input);
            List <List <int> > perfectMatchings = new List <List <int> >();
            //PerfectMatchings(validConnections, new List<int>(), 0);
            UInt64 G = (ulong)input.Count(c => c == 'G');
            UInt64 A = (ulong)input.Count(c => c == 'A');
            ulong  d = 121645100408832;
            ulong  e = 20922789888;

            ulong f = e * d;

            Console.WriteLine( );


            //UInt64 Fg = Factorial(G);
            //UInt64 Fa = Factorial(A);
            //UInt64 result = Fg * Fa;
            //Console.WriteLine(result);
            ////Console.WriteLine(_count);
        }
예제 #8
0
        //http://rosalind.info/problems/kmer/

        public KMerComposition()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();
            string        symbols    = "";

            for (int i = 0; i < dnaStrings[0].Length; i++)
            {
                if (symbols.All(c => c != dnaStrings[0][i]))
                {
                    symbols += dnaStrings[0][i];
                }
            }
            var symbolList = symbols.Select(c => c.ToString()).ToList();

            symbolList.Sort();

            var kmers = new Dictionary <string, int>();

            Solve(kmers, symbolList, "", 4);

            for (int i = 0; i < dnaStrings[0].Length - 3; i++)
            {
                kmers[dnaStrings[0].Substring(i, 4)]++;
            }
            Console.Write(string.Join(" ", kmers.Values));
        }
예제 #9
0
        //http://rosalind.info/problems/lcsq/

        public FindingASharedSplicedMotif()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();

            dod(dnaStrings[0], dnaStrings[1]);
            Console.WriteLine(_subsequence);
            Console.WriteLine("done");
        }
        //http://rosalind.info/problems/long/

        public GenomeAssemblyAsShortestSuperstring()
        {
            List <string> dnaStrings  = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();
            string        superstring = dnaStrings[0];

            dnaStrings.RemoveAt(0);

            while (dnaStrings.Any())
            {
                int bestPrefixLength = 0;
                int bestSuffixLength = 0;
                int bestPrefix       = -1;
                int bestSuffix       = -1;

                for (int i = 0; i < dnaStrings.Count; i++)
                {
                    int overlap = dnaStrings[i].Length;
                    while (overlap > dnaStrings[i].Length / 2 && (overlap > bestPrefixLength || overlap > bestSuffixLength))
                    {
                        if (overlap > bestPrefixLength && superstring.StartsWith(dnaStrings[i].Substring(dnaStrings[i].Length - overlap)))
                        {
                            bestPrefixLength = overlap;
                            bestPrefix       = i;
                        }
                        else if (overlap > bestSuffixLength && superstring.EndsWith(dnaStrings[i].Substring(0, overlap)))
                        {
                            bestSuffixLength = overlap;
                            bestSuffix       = i;
                        }
                        else
                        {
                            overlap--;
                        }
                    }
                }

                if (bestPrefix >= 0)
                {
                    superstring = dnaStrings[bestPrefix] + superstring.Substring(bestPrefixLength);
                }

                if (bestSuffix >= 0)
                {
                    superstring = superstring + dnaStrings[bestSuffix].Substring(bestSuffixLength);
                }

                dnaStrings.RemoveAt(Math.Max(bestPrefix, bestSuffix));
                if (Math.Min(bestPrefix, bestSuffix) >= 0)
                {
                    dnaStrings.RemoveAt(Math.Min(bestPrefix, bestSuffix));
                }
            }

            Console.WriteLine(superstring);
        }
예제 #11
0
        //http://rosalind.info/problems/corr/

        public ErrorCorrectionInReads()
        {
            string[] dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToArray();

            var valid   = new List <string>();
            var invalid = new List <string>();

            foreach (string dnaString in dnaStrings)
            {
                bool   isValid = false;
                string reverse = ComplementingAStrandOfDna.ReverseCompliment(dnaString, false);

                for (int i = 0; i < valid.Count; i++)
                {
                    if (dnaString.Equals(valid[i]) || reverse.Equals(valid[i]))
                    {
                        isValid = true;
                        break;
                    }
                }

                if (isValid)
                {
                    continue;
                }

                for (int i = 0; i < invalid.Count; i++)
                {
                    if (dnaString.Equals(invalid[i]) || reverse.Equals(invalid[i]))
                    {
                        valid.Add(dnaString);
                        valid.Add(reverse);
                        invalid.RemoveAt(i);
                        isValid = true;
                        break;
                    }
                }

                if (isValid == false)
                {
                    invalid.Add(dnaString);
                }
            }

            foreach (string inv in invalid)
            {
                foreach (var val in valid)
                {
                    if (CountingPointMutations.GetHammingDistance(inv, val) == 1)
                    {
                        Console.WriteLine(inv + "->" + val);
                    }
                }
            }
        }
        //http://rosalind.info/problems/pmch/

        public PerfectMatchingAndRnaSecondaryStructures()
        {
            string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value;

            _perfectLength = input.Length / 2;

            var result = new List <List <KeyValuePair <int, int> > >();

            Matches(input, result, new List <KeyValuePair <int, int> >());
            Console.WriteLine(result.Count);
        }
예제 #13
0
        //http://rosalind.info/problems/sseq/

        public FindingASplicedMotif()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();

            int index = 0;

            foreach (char c in dnaStrings[1])
            {
                index = dnaStrings[0].IndexOf(c, index) + 1;
                Console.Write(index + " ");
            }
        }
예제 #14
0
        //http://rosalind.info/problems/ctea/

        public CountingOptimalAlignments()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();
            var           pairs      = new Dictionary <KeyValuePair <int, int>, KeyValuePair <int, int> >();

            _s1 = dnaStrings[0];
            _s2 = dnaStrings[1];
            int changeCount = EditDistanceAlignment.GenerateAlignmentPathPairs(_s1, _s2, _s1.Length - 1, _s2.Length - 1, pairs);

            ulong count = CountPaths(pairs, pairs.Last().Key, new Dictionary <KeyValuePair <int, int>, ulong>());

            Console.WriteLine(count);
        }
        //http://rosalind.info/problems/lcsq/

        public FindingASharedSplicedMotifStolen()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();
            var           pairs      = new Dictionary <KeyValuePair <int, int>, int>();

            dod(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1, pairs);
            string sequence = reconstruct(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1, pairs);

            sequence = string.Join("", sequence.Reverse());
            Console.WriteLine(sequence.Length);
            Console.WriteLine(sequence);
            //Console.WriteLine(Environment.NewLine);
            //Console.WriteLine(bluh(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1));
        }
        //http://rosalind.info/problems/glob/

        public GlobalAlignmentWithScoringMatrix()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();
            var           pairs      = new Dictionary <KeyValuePair <int, int>, KeyValuePair <int, int> >();

            var s1Chars = new List <char>(dnaStrings[0]);
            var s2Chars = new List <char>(dnaStrings[1]);

            int changeCount = GenerateAlignmentPathPairs(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1, pairs);

            AddHyphens(s1Chars, s2Chars, pairs, pairs.Last().Key);

            Console.WriteLine(changeCount);
            Console.WriteLine(string.Join("", s1Chars));
            Console.WriteLine(string.Join("", s2Chars));
        }
예제 #17
0
        //http://rosalind.info/problems/cons/

        public ConcensusAndProfile()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();

            int[] A = new int[dnaStrings[0].Length];
            int[] C = new int[dnaStrings[0].Length];
            int[] G = new int[dnaStrings[0].Length];
            int[] T = new int[dnaStrings[0].Length];

            for (int i = 0; i < dnaStrings[0].Length; i++)
            {
                string concensus = "";

                for (int j = 0; j < dnaStrings.Count; j++)
                {
                    concensus += dnaStrings[j][i];
                }

                A[i] = concensus.Count(c => c == 'A');
                C[i] = concensus.Count(c => c == 'C');
                G[i] = concensus.Count(c => c == 'G');
                T[i] = concensus.Count(c => c == 'T');

                char c2  = 'A';
                int  max = A[i];
                if (C[i] > max)
                {
                    c2  = 'C';
                    max = C[i];
                }
                if (G[i] > max)
                {
                    c2  = 'G';
                    max = G[i];
                }
                if (T[i] > max)
                {
                    c2 = 'T';
                }
                Console.Write(c2);
            }
            Console.Write(Environment.NewLine);
            WriteCharacterArray(A, 'A');
            WriteCharacterArray(C, 'C');
            WriteCharacterArray(G, 'G');
            WriteCharacterArray(T, 'T');
        }
예제 #18
0
        //http://rosalind.info/problems/orf/

        public OpenReadingFrames()
        {
            var    proteins = new List <ProteinString>();
            string input    = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value.Replace('T', 'U');

            FindProteinOpenFrames(proteins, input);
            FindProteinOpenFrames(proteins, input.Substring(1));
            FindProteinOpenFrames(proteins, input.Substring(2));

            input = ComplementingAStrandOfDna.ReverseCompliment(input, true);

            FindProteinOpenFrames(proteins, input);
            FindProteinOpenFrames(proteins, input.Substring(1));
            FindProteinOpenFrames(proteins, input.Substring(2));

            proteins.Select(p => p.Protein).Distinct().ToList().ForEach(p => Console.WriteLine(p));
        }
예제 #19
0
        //http://rosalind.info/problems/cat/

        public CatalanNumbersAndRnaSecondaryStructures()
        {
            string      input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value;
            List <char> nodes = input.ToCharArray().ToList();
            var         store = new Dictionary <KeyValuePair <int, int>, long>();

            long result = 0;

            for (int i = 1; i < nodes.Count; i += 2)
            {
                var  pair = new KeyValuePair <int, int>(0, i);
                long rtn  = CatalanNumbers(store, nodes, pair, i);
                result = (result + rtn) % 1000000;
            }

            Console.WriteLine(result);
        }
예제 #20
0
        //http://rosalind.info/problems/grph/

        public OverlapGraphs()
        {
            Dictionary <string, string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList());

            foreach (KeyValuePair <string, string> dnaString in dnaStrings)
            {
                foreach (KeyValuePair <string, string> dnaStringMatch in dnaStrings)
                {
                    if (dnaString.Value.Equals(dnaStringMatch.Value) == false)
                    {
                        if (dnaString.Value.Substring(dnaString.Value.Length - 3, 3).Equals(dnaStringMatch.Value.Substring(0, 3)))
                        {
                            Console.WriteLine(dnaString.Key + " " + dnaStringMatch.Key);
                        }
                    }
                }
            }
        }
        //http://rosalind.info/problems/lcsq/

        public FindingASharedSplicedMotifAlternative()
        {
            List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList();

            for (int i = 0; i < dnaStrings[0].Length; i++)
            {
                //for (int j = 0; j < 500; j++)
                {
                    if (dnaStrings[0].Length - i > _result.Length)
                    {
                        dod(new List <char>(dnaStrings[0]), new List <char>(dnaStrings[1]), i, 1);
                        dod(new List <char>(dnaStrings[0]), new List <char>(dnaStrings[1]), i, 2);
                        dod(new List <char>(dnaStrings[0]), new List <char>(dnaStrings[1]), i, 3);
                    }
                }
            }
            Console.WriteLine(_index + "(" + _result.Length + ")" + " done: ");
            Console.WriteLine(_result);
        }
예제 #22
0
        //http://rosalind.info/problems/motz/

        public MotzkinNumbersAndRnaSecondaryStructures()
        {
            //Add all the most obvious ones as far as you can one at a time, then step back and retread all the way through

            string      input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value;
            List <char> nodes = input.ToCharArray().ToList();
            var         store = new Dictionary <KeyValuePair <int, int>, long>();

            long result = 0;

            for (int i = 1; i < nodes.Count; i += 2)
            {
                var  pair = new KeyValuePair <int, int>(0, i);
                long rtn  = MotzkinNumbers(store, nodes, pair, i);
                result = (result + rtn) % 1000000;
            }

            Console.WriteLine(1 + result); //add one for no connections
        }
예제 #23
0
        //http://rosalind.info/problems/revp/

        public LocatingRestrictionSites()
        {
            string input    = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value;
            string reversed = ComplementingAStrandOfDna.ReverseCompliment(input, false);

            for (int i = 0; i < input.Length - 3; i++)
            {
                for (int j = 4; j <= 12; j++)
                {
                    if (i + j > input.Length)
                    {
                        break;
                    }

                    if (input.Substring(i, j).Equals(reversed.Substring(input.Length - (i + j), j)))
                    {
                        Console.WriteLine(i + 1 + " " + j);
                    }
                }
            }
        }
        //http://rosalind.info/problems/mmch/

        public MaximumMatchingsAndRnaSecondaryStructures()
        {
            //.NET still can't handle big numbers~

            string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value;

            UInt64 G = (ulong)input.Count(c => c == 'G');
            UInt64 C = (ulong)input.Count(c => c == 'C');
            UInt64 A = (ulong)input.Count(c => c == 'A');
            UInt64 U = (ulong)input.Count(c => c == 'U');

            UInt64 maxGC = Math.Max(G, C);
            UInt64 minGC = Math.Min(G, C);

            UInt64 maxAU = Math.Max(A, U);
            UInt64 minAU = Math.Min(A, U);

            UInt64 Fg     = Factorial(maxGC, maxGC - minGC);
            UInt64 Fa     = Factorial(maxAU, maxAU - minAU);
            UInt64 result = Fg * Fa;

            Console.WriteLine(result);
        }
        //http://rosalind.info/problems/tran/

        public TransitionsAndTransversions()
        {
            string [] dnaStrings    = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToArray();
            double    transitions   = 0;
            double    transversions = 0;

            for (int i = 0; i < dnaStrings[0].Length; i++)
            {
                char a = dnaStrings[0][i];
                char b = dnaStrings[1][i];

                if (_transitions.ContainsKey(a) && _transitions[a] == b)
                {
                    transitions++;
                }
                else if (a != b)
                {
                    transversions++;
                }
            }

            Console.WriteLine(transitions / transversions);
        }