public double GetScore(WregexResult w) { if (m_pssm.Count == 0) { return(0.0); } double score = 0.0; int i, j, k, len; for (i = 0; i < w.Groups.Count; i++) { for (j = 0; j < m_pssm.Count && m_pssm[j].order != i; j++) { if (m_pssm[j].order >= w.Groups.Count) { throw new ApplicationException("Regex and PSSM have diferent lengths"); } } if (j == m_pssm.Count) { continue; } //Console.WriteLine( "DEBUG: " + w.Groups[i] + ", " + w.Groups[i].Length ); len = w.Groups[i].Length; for (k = 0; k < len; k++) { score += GetScore(m_pssm[j], AminoAcid.Get((w.Groups[i])[k])); ///len; } } return(Math.Pow(10, score / w.Match.Length) * 100); }
public static void runEnsembl(string file) { StreamReader rd = new StreamReader(new GZipStream(new FileStream(file, FileMode.Open), CompressionMode.Decompress)); string line; char[] sep1 = new char[] { ',' }; char[] sep2 = new char[] { ':' }; string[] fields, fields2; String mut, prot; Variant v; Fasta f; List <string> ids = new List <string>(); SortedList <string, Fasta> fasta = new SortedList <string, Fasta>(); eFetchSnpService ssrv = new eFetchSnpService(); EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchRequest sreq = new EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchRequest(); EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchResult sres; eFetchSequenceService psrv = new eFetchSequenceService(); EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchRequest preq = new EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchRequest(); EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchResult pres; //int count = 1; while ((line = rd.ReadLine()) != null) { fields = line.Split(sep1); if (!fields[2].Contains("/") || fields[3].Length == 0 || fields[3] != fields[4]) { continue; } v = new Variant(); if (ids.Contains(fields[0])) { continue; } ids.Add(fields[0]); sreq.id = fields[0]; sres = ssrv.run_eFetch(sreq); if (sres == null || sres.ExchangeSet.Rs == null || sres.ExchangeSet.Rs.Length == 0) { continue; } Console.WriteLine(fields[0] + "..."); foreach (string str in sres.ExchangeSet.Rs[0].hgvs) { if (!str.StartsWith("NP_")) { continue; } Console.Write(str + " "); v = new Variant(); v.id = str; fields2 = str.Split(sep2); mut = fields2[1]; prot = fields2[0]; try { v.orig = AminoAcid.Get(mut.Substring(2, 3)).Letter; v.mut = AminoAcid.Get(mut.Substring(mut.Length - 3, 3)).Letter; v.pos = ulong.Parse(mut.Substring(5, mut.Length - 8)) - 1; } catch { Console.WriteLine("(filtered)"); continue; } if (fasta.ContainsKey(prot)) { Console.WriteLine("(cached)"); f = fasta[prot]; } else { preq.db = "protein"; preq.id = prot; pres = psrv.run_eFetch(preq); f = new Fasta(Fasta.Type.Protein, prot + "|" + pres.GBSet[0].GBSeq_definition, pres.GBSet[0].GBSeq_sequence); fasta.Add(prot, f); Console.WriteLine("(downloaded)"); } f.mVariants.Add(v); } /*if( --count == 0 ) * break;*/ } foreach (Fasta fas in fasta.Values) { fas.Dump(true); } }