//http://rosalind.info/problems/pdst/ public CreatingADistanceMatrix() { string[] dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToArray(); double stringLength = dnaStrings[0].Length; for (int i = 0; i < dnaStrings.Length; i++) { for (int j = 0; j < dnaStrings.Length; j++) { double pDistance = 0; if (i != j) { for (int k = 0; k < stringLength; k++) { if (dnaStrings[i][k] != dnaStrings[j][k]) { pDistance++; } } } Console.Write((pDistance / stringLength).ToString("0.00000") + " "); } Console.Write(Environment.NewLine); } }
//http://rosalind.info/problems/lcsm/ public FindingASharedMotif() { List <string> input = File.ReadAllLines(@"C:\code\dataset.txt").ToList(); var dnaStrings = FASTAToDictionary.Convert(input).Values.ToList(); dnaStrings = dnaStrings.OrderBy(s => s.Length).ToList(); List <string> subStrings = new List <string>(); for (int i = 0; i < dnaStrings[0].Length; i++) { for (int j = 1; j <= dnaStrings[0].Length - i; j++) { subStrings.Add(dnaStrings[0].Substring(i, j)); } } subStrings = subStrings.OrderByDescending(s => s.Length).ToList(); foreach (string subString in subStrings) { if (dnaStrings.All(s => s.Contains(subString))) { Console.WriteLine(subString); return; } } }
//http://rosalind.info/problems/edit/ public EditDistance() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); int changeCount = Distance(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1, new Dictionary <KeyValuePair <int, int>, int>()); Console.WriteLine(changeCount); }
//http://rosalind.info/problems/splc/ public RnaSplicing() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); string dnaString = dnaStrings[0]; for (int i = 1; i < dnaStrings.Count; i++) { dnaString = dnaString.Replace(dnaStrings[i], ""); } dnaString = dnaString.Replace('T', 'U'); using (var stringReader = new StringReader(dnaString)) { string aa = ""; while (aa != "Stop") { char[] buffer = new char[3]; stringReader.ReadBlock(buffer, 0, 3); aa = string.Join("", buffer).ConvertCodon(); if (aa != "Stop") { Console.Write(aa); } } } }
//http://rosalind.info/problems/lcsq/ public FindingASharedSplicedMotifAlternative2() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); blu(dnaStrings[0], dnaStrings[1], "ACGT"); blu(dnaStrings[0], dnaStrings[1], "ACTG"); blu(dnaStrings[0], dnaStrings[1], "AGCT"); blu(dnaStrings[0], dnaStrings[1], "AGTC"); blu(dnaStrings[0], dnaStrings[1], "ATGC"); blu(dnaStrings[0], dnaStrings[1], "ATCG"); blu(dnaStrings[0], dnaStrings[1], "CAGT"); blu(dnaStrings[0], dnaStrings[1], "CATG"); blu(dnaStrings[0], dnaStrings[1], "CGAT"); blu(dnaStrings[0], dnaStrings[1], "CGTA"); blu(dnaStrings[0], dnaStrings[1], "CTAG"); blu(dnaStrings[0], dnaStrings[1], "CTGA"); blu(dnaStrings[0], dnaStrings[1], "GCAT"); blu(dnaStrings[0], dnaStrings[1], "GCTA"); blu(dnaStrings[0], dnaStrings[1], "GACT"); blu(dnaStrings[0], dnaStrings[1], "GATC"); blu(dnaStrings[0], dnaStrings[1], "GTCA"); blu(dnaStrings[0], dnaStrings[1], "GTAC"); blu(dnaStrings[0], dnaStrings[1], "TCGA"); blu(dnaStrings[0], dnaStrings[1], "TCAG"); blu(dnaStrings[0], dnaStrings[1], "TAGC"); blu(dnaStrings[0], dnaStrings[1], "TACG"); blu(dnaStrings[0], dnaStrings[1], "TGAC"); blu(dnaStrings[0], dnaStrings[1], "TGCA"); }
//http://rosalind.info/problems/kmp/ public SpeedingUpMotifFinding() { string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value; int sequence = 0; for (int i = 0; i < input.Length; i++) { if (i != 0 && input[sequence] == input[i]) { sequence++; } else if (sequence != 0) { for (int j = sequence; j >= 0; j--) { if (input.Substring(0, j).Equals(input.Substring(i - j + 1, j))) { sequence = j; break; } } } else { sequence = 0; } Console.Write(sequence + " "); } }
//http://rosalind.info/problems/pmch/ public PerfectMatchingAndRnaSecondaryStructuresAlternative() { string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value; _perfectLength = input.Length / 2; List <List <int> > validConnections = ValidConnections(input); List <List <int> > perfectMatchings = new List <List <int> >(); //PerfectMatchings(validConnections, new List<int>(), 0); UInt64 G = (ulong)input.Count(c => c == 'G'); UInt64 A = (ulong)input.Count(c => c == 'A'); ulong d = 121645100408832; ulong e = 20922789888; ulong f = e * d; Console.WriteLine( ); //UInt64 Fg = Factorial(G); //UInt64 Fa = Factorial(A); //UInt64 result = Fg * Fa; //Console.WriteLine(result); ////Console.WriteLine(_count); }
//http://rosalind.info/problems/kmer/ public KMerComposition() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); string symbols = ""; for (int i = 0; i < dnaStrings[0].Length; i++) { if (symbols.All(c => c != dnaStrings[0][i])) { symbols += dnaStrings[0][i]; } } var symbolList = symbols.Select(c => c.ToString()).ToList(); symbolList.Sort(); var kmers = new Dictionary <string, int>(); Solve(kmers, symbolList, "", 4); for (int i = 0; i < dnaStrings[0].Length - 3; i++) { kmers[dnaStrings[0].Substring(i, 4)]++; } Console.Write(string.Join(" ", kmers.Values)); }
//http://rosalind.info/problems/lcsq/ public FindingASharedSplicedMotif() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); dod(dnaStrings[0], dnaStrings[1]); Console.WriteLine(_subsequence); Console.WriteLine("done"); }
//http://rosalind.info/problems/long/ public GenomeAssemblyAsShortestSuperstring() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); string superstring = dnaStrings[0]; dnaStrings.RemoveAt(0); while (dnaStrings.Any()) { int bestPrefixLength = 0; int bestSuffixLength = 0; int bestPrefix = -1; int bestSuffix = -1; for (int i = 0; i < dnaStrings.Count; i++) { int overlap = dnaStrings[i].Length; while (overlap > dnaStrings[i].Length / 2 && (overlap > bestPrefixLength || overlap > bestSuffixLength)) { if (overlap > bestPrefixLength && superstring.StartsWith(dnaStrings[i].Substring(dnaStrings[i].Length - overlap))) { bestPrefixLength = overlap; bestPrefix = i; } else if (overlap > bestSuffixLength && superstring.EndsWith(dnaStrings[i].Substring(0, overlap))) { bestSuffixLength = overlap; bestSuffix = i; } else { overlap--; } } } if (bestPrefix >= 0) { superstring = dnaStrings[bestPrefix] + superstring.Substring(bestPrefixLength); } if (bestSuffix >= 0) { superstring = superstring + dnaStrings[bestSuffix].Substring(bestSuffixLength); } dnaStrings.RemoveAt(Math.Max(bestPrefix, bestSuffix)); if (Math.Min(bestPrefix, bestSuffix) >= 0) { dnaStrings.RemoveAt(Math.Min(bestPrefix, bestSuffix)); } } Console.WriteLine(superstring); }
//http://rosalind.info/problems/corr/ public ErrorCorrectionInReads() { string[] dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToArray(); var valid = new List <string>(); var invalid = new List <string>(); foreach (string dnaString in dnaStrings) { bool isValid = false; string reverse = ComplementingAStrandOfDna.ReverseCompliment(dnaString, false); for (int i = 0; i < valid.Count; i++) { if (dnaString.Equals(valid[i]) || reverse.Equals(valid[i])) { isValid = true; break; } } if (isValid) { continue; } for (int i = 0; i < invalid.Count; i++) { if (dnaString.Equals(invalid[i]) || reverse.Equals(invalid[i])) { valid.Add(dnaString); valid.Add(reverse); invalid.RemoveAt(i); isValid = true; break; } } if (isValid == false) { invalid.Add(dnaString); } } foreach (string inv in invalid) { foreach (var val in valid) { if (CountingPointMutations.GetHammingDistance(inv, val) == 1) { Console.WriteLine(inv + "->" + val); } } } }
//http://rosalind.info/problems/pmch/ public PerfectMatchingAndRnaSecondaryStructures() { string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value; _perfectLength = input.Length / 2; var result = new List <List <KeyValuePair <int, int> > >(); Matches(input, result, new List <KeyValuePair <int, int> >()); Console.WriteLine(result.Count); }
//http://rosalind.info/problems/sseq/ public FindingASplicedMotif() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); int index = 0; foreach (char c in dnaStrings[1]) { index = dnaStrings[0].IndexOf(c, index) + 1; Console.Write(index + " "); } }
//http://rosalind.info/problems/ctea/ public CountingOptimalAlignments() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); var pairs = new Dictionary <KeyValuePair <int, int>, KeyValuePair <int, int> >(); _s1 = dnaStrings[0]; _s2 = dnaStrings[1]; int changeCount = EditDistanceAlignment.GenerateAlignmentPathPairs(_s1, _s2, _s1.Length - 1, _s2.Length - 1, pairs); ulong count = CountPaths(pairs, pairs.Last().Key, new Dictionary <KeyValuePair <int, int>, ulong>()); Console.WriteLine(count); }
//http://rosalind.info/problems/lcsq/ public FindingASharedSplicedMotifStolen() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); var pairs = new Dictionary <KeyValuePair <int, int>, int>(); dod(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1, pairs); string sequence = reconstruct(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1, pairs); sequence = string.Join("", sequence.Reverse()); Console.WriteLine(sequence.Length); Console.WriteLine(sequence); //Console.WriteLine(Environment.NewLine); //Console.WriteLine(bluh(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1)); }
//http://rosalind.info/problems/glob/ public GlobalAlignmentWithScoringMatrix() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); var pairs = new Dictionary <KeyValuePair <int, int>, KeyValuePair <int, int> >(); var s1Chars = new List <char>(dnaStrings[0]); var s2Chars = new List <char>(dnaStrings[1]); int changeCount = GenerateAlignmentPathPairs(dnaStrings[0], dnaStrings[1], dnaStrings[0].Length - 1, dnaStrings[1].Length - 1, pairs); AddHyphens(s1Chars, s2Chars, pairs, pairs.Last().Key); Console.WriteLine(changeCount); Console.WriteLine(string.Join("", s1Chars)); Console.WriteLine(string.Join("", s2Chars)); }
//http://rosalind.info/problems/cons/ public ConcensusAndProfile() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); int[] A = new int[dnaStrings[0].Length]; int[] C = new int[dnaStrings[0].Length]; int[] G = new int[dnaStrings[0].Length]; int[] T = new int[dnaStrings[0].Length]; for (int i = 0; i < dnaStrings[0].Length; i++) { string concensus = ""; for (int j = 0; j < dnaStrings.Count; j++) { concensus += dnaStrings[j][i]; } A[i] = concensus.Count(c => c == 'A'); C[i] = concensus.Count(c => c == 'C'); G[i] = concensus.Count(c => c == 'G'); T[i] = concensus.Count(c => c == 'T'); char c2 = 'A'; int max = A[i]; if (C[i] > max) { c2 = 'C'; max = C[i]; } if (G[i] > max) { c2 = 'G'; max = G[i]; } if (T[i] > max) { c2 = 'T'; } Console.Write(c2); } Console.Write(Environment.NewLine); WriteCharacterArray(A, 'A'); WriteCharacterArray(C, 'C'); WriteCharacterArray(G, 'G'); WriteCharacterArray(T, 'T'); }
//http://rosalind.info/problems/orf/ public OpenReadingFrames() { var proteins = new List <ProteinString>(); string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value.Replace('T', 'U'); FindProteinOpenFrames(proteins, input); FindProteinOpenFrames(proteins, input.Substring(1)); FindProteinOpenFrames(proteins, input.Substring(2)); input = ComplementingAStrandOfDna.ReverseCompliment(input, true); FindProteinOpenFrames(proteins, input); FindProteinOpenFrames(proteins, input.Substring(1)); FindProteinOpenFrames(proteins, input.Substring(2)); proteins.Select(p => p.Protein).Distinct().ToList().ForEach(p => Console.WriteLine(p)); }
//http://rosalind.info/problems/cat/ public CatalanNumbersAndRnaSecondaryStructures() { string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value; List <char> nodes = input.ToCharArray().ToList(); var store = new Dictionary <KeyValuePair <int, int>, long>(); long result = 0; for (int i = 1; i < nodes.Count; i += 2) { var pair = new KeyValuePair <int, int>(0, i); long rtn = CatalanNumbers(store, nodes, pair, i); result = (result + rtn) % 1000000; } Console.WriteLine(result); }
//http://rosalind.info/problems/grph/ public OverlapGraphs() { Dictionary <string, string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()); foreach (KeyValuePair <string, string> dnaString in dnaStrings) { foreach (KeyValuePair <string, string> dnaStringMatch in dnaStrings) { if (dnaString.Value.Equals(dnaStringMatch.Value) == false) { if (dnaString.Value.Substring(dnaString.Value.Length - 3, 3).Equals(dnaStringMatch.Value.Substring(0, 3))) { Console.WriteLine(dnaString.Key + " " + dnaStringMatch.Key); } } } } }
//http://rosalind.info/problems/lcsq/ public FindingASharedSplicedMotifAlternative() { List <string> dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToList(); for (int i = 0; i < dnaStrings[0].Length; i++) { //for (int j = 0; j < 500; j++) { if (dnaStrings[0].Length - i > _result.Length) { dod(new List <char>(dnaStrings[0]), new List <char>(dnaStrings[1]), i, 1); dod(new List <char>(dnaStrings[0]), new List <char>(dnaStrings[1]), i, 2); dod(new List <char>(dnaStrings[0]), new List <char>(dnaStrings[1]), i, 3); } } } Console.WriteLine(_index + "(" + _result.Length + ")" + " done: "); Console.WriteLine(_result); }
//http://rosalind.info/problems/motz/ public MotzkinNumbersAndRnaSecondaryStructures() { //Add all the most obvious ones as far as you can one at a time, then step back and retread all the way through string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value; List <char> nodes = input.ToCharArray().ToList(); var store = new Dictionary <KeyValuePair <int, int>, long>(); long result = 0; for (int i = 1; i < nodes.Count; i += 2) { var pair = new KeyValuePair <int, int>(0, i); long rtn = MotzkinNumbers(store, nodes, pair, i); result = (result + rtn) % 1000000; } Console.WriteLine(1 + result); //add one for no connections }
//http://rosalind.info/problems/revp/ public LocatingRestrictionSites() { string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value; string reversed = ComplementingAStrandOfDna.ReverseCompliment(input, false); for (int i = 0; i < input.Length - 3; i++) { for (int j = 4; j <= 12; j++) { if (i + j > input.Length) { break; } if (input.Substring(i, j).Equals(reversed.Substring(input.Length - (i + j), j))) { Console.WriteLine(i + 1 + " " + j); } } } }
//http://rosalind.info/problems/mmch/ public MaximumMatchingsAndRnaSecondaryStructures() { //.NET still can't handle big numbers~ string input = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).First().Value; UInt64 G = (ulong)input.Count(c => c == 'G'); UInt64 C = (ulong)input.Count(c => c == 'C'); UInt64 A = (ulong)input.Count(c => c == 'A'); UInt64 U = (ulong)input.Count(c => c == 'U'); UInt64 maxGC = Math.Max(G, C); UInt64 minGC = Math.Min(G, C); UInt64 maxAU = Math.Max(A, U); UInt64 minAU = Math.Min(A, U); UInt64 Fg = Factorial(maxGC, maxGC - minGC); UInt64 Fa = Factorial(maxAU, maxAU - minAU); UInt64 result = Fg * Fa; Console.WriteLine(result); }
//http://rosalind.info/problems/tran/ public TransitionsAndTransversions() { string [] dnaStrings = FASTAToDictionary.Convert(File.ReadAllLines(@"C:\code\dataset.txt").ToList()).Values.ToArray(); double transitions = 0; double transversions = 0; for (int i = 0; i < dnaStrings[0].Length; i++) { char a = dnaStrings[0][i]; char b = dnaStrings[1][i]; if (_transitions.ContainsKey(a) && _transitions[a] == b) { transitions++; } else if (a != b) { transversions++; } } Console.WriteLine(transitions / transversions); }