示例#1
0
        public double GetScore(WregexResult w)
        {
            if (m_pssm.Count == 0)
            {
                return(0.0);
            }

            double score = 0.0;
            int    i, j, k, len;

            for (i = 0; i < w.Groups.Count; i++)
            {
                for (j = 0; j < m_pssm.Count && m_pssm[j].order != i; j++)
                {
                    if (m_pssm[j].order >= w.Groups.Count)
                    {
                        throw new ApplicationException("Regex and PSSM have diferent lengths");
                    }
                }
                if (j == m_pssm.Count)
                {
                    continue;
                }
                //Console.WriteLine( "DEBUG: " + w.Groups[i] + ", " + w.Groups[i].Length );
                len = w.Groups[i].Length;
                for (k = 0; k < len; k++)
                {
                    score += GetScore(m_pssm[j], AminoAcid.Get((w.Groups[i])[k]));            ///len;
                }
            }

            return(Math.Pow(10, score / w.Match.Length) * 100);
        }
示例#2
0
        public static void runEnsembl(string file)
        {
            StreamReader rd = new StreamReader(new GZipStream(new FileStream(file, FileMode.Open), CompressionMode.Decompress));
            string       line;

            char[]        sep1 = new char[] { ',' };
            char[]        sep2 = new char[] { ':' };
            string[]      fields, fields2;
            String        mut, prot;
            Variant       v;
            Fasta         f;
            List <string> ids = new List <string>();
            SortedList <string, Fasta> fasta = new SortedList <string, Fasta>();
            eFetchSnpService           ssrv  = new eFetchSnpService();

            EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchRequest sreq = new EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchRequest();
            EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchResult  sres;
            eFetchSequenceService psrv = new eFetchSequenceService();

            EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchRequest preq = new EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchRequest();
            EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchResult  pres;
            //int count = 1;
            while ((line = rd.ReadLine()) != null)
            {
                fields = line.Split(sep1);
                if (!fields[2].Contains("/") || fields[3].Length == 0 || fields[3] != fields[4])
                {
                    continue;
                }
                v = new Variant();
                if (ids.Contains(fields[0]))
                {
                    continue;
                }
                ids.Add(fields[0]);
                sreq.id = fields[0];
                sres    = ssrv.run_eFetch(sreq);
                if (sres == null || sres.ExchangeSet.Rs == null || sres.ExchangeSet.Rs.Length == 0)
                {
                    continue;
                }
                Console.WriteLine(fields[0] + "...");
                foreach (string str in sres.ExchangeSet.Rs[0].hgvs)
                {
                    if (!str.StartsWith("NP_"))
                    {
                        continue;
                    }
                    Console.Write(str + " ");
                    v       = new Variant();
                    v.id    = str;
                    fields2 = str.Split(sep2);
                    mut     = fields2[1]; prot = fields2[0];
                    try {
                        v.orig = AminoAcid.Get(mut.Substring(2, 3)).Letter;
                        v.mut  = AminoAcid.Get(mut.Substring(mut.Length - 3, 3)).Letter;
                        v.pos  = ulong.Parse(mut.Substring(5, mut.Length - 8)) - 1;
                    } catch {
                        Console.WriteLine("(filtered)");
                        continue;
                    }
                    if (fasta.ContainsKey(prot))
                    {
                        Console.WriteLine("(cached)");
                        f = fasta[prot];
                    }
                    else
                    {
                        preq.db = "protein";
                        preq.id = prot;
                        pres    = psrv.run_eFetch(preq);
                        f       = new Fasta(Fasta.Type.Protein, prot + "|" + pres.GBSet[0].GBSeq_definition, pres.GBSet[0].GBSeq_sequence);
                        fasta.Add(prot, f);
                        Console.WriteLine("(downloaded)");
                    }
                    f.mVariants.Add(v);
                }

                /*if( --count == 0 )
                 *      break;*/
            }

            foreach (Fasta fas in fasta.Values)
            {
                fas.Dump(true);
            }
        }