示例#1
0
        static void Main(string[] args)
        {
            string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);

            string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set
            string[] lines = { "" };

            lines = File.ReadAllLines(files[0]);

            List <Fasta> fastas = Fasta.DecodeFasta(lines);

            Stopwatch itime = new Stopwatch();

            string[] dnaOnly = new string[fastas.Count];
            for (int i = 0; i < fastas.Count; i++)
            {
                dnaOnly[i] = fastas[i].DNA;
            }
            itime.Start();
            Console.WriteLine(LongestCommonSubstring(dnaOnly.ToList()));
            itime.Stop();
            Console.WriteLine("Took: {0}ms", itime.ElapsedMilliseconds);

            Console.ReadLine();
        }
示例#2
0
        static void databaseMaker_OnInvalidHeader(object sender, FastaEvent e)
        {
            Fasta fasta = e.Fasta;

            Console.WriteLine("\nInvalid Header!");
            Console.WriteLine(fasta.Description);
        }
示例#3
0
        public void PopulateATCGDnaFastaFormat()
        {
            FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCG"));
            Fasta       fasta       = fastaReader.Read();

            Assert.AreEqual("ATCG", fasta.Dna);
        }
示例#4
0
        public void PopulateDEFHeaderFastaFormat()
        {
            FastaReader fastaReader = new FastaReader(new StringReader(">DEF"));
            Fasta       fasta       = fastaReader.Read();

            Assert.AreEqual("DEF", fasta.Header);
        }
示例#5
0
        public void PopulateMultiLineDnaFastaFormat()
        {
            FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\nATCGGGCTAAT\nATCGGGCTAAT"));
            Fasta       fasta       = fastaReader.Read();

            Assert.AreEqual("ATCGGGCTAATATCGGGCTAATATCGGGCTAATATCGGGCTAAT", fasta.Dna);
        }
示例#6
0
        public void GcContentTests()
        {
            // Arrange
            var           fasta              = new Fasta();
            const int     entryCount         = 3;
            var           highestGcRatio     = 0m;
            const decimal expectedGcRatio    = 60.919540m;
            var           highestFastaLabel  = "";
            const string  expectedFastaLabel = "Rosalind_0808";

            // Act
            var entries = fasta.ReadEntries(@"Data\GC\GC_SampleDataSet.txt");

            foreach (var entry in entries)
            {
                var ratio = entry.Dna.CalculateGcRatio();
                if (ratio > highestGcRatio)
                {
                    highestGcRatio    = ratio;
                    highestFastaLabel = entry.Label;
                }
            }

            highestGcRatio = Math.Round(highestGcRatio * 100, 6);

            // Assert
            Assert.That(entryCount, Is.EqualTo(entries.Count));
            Assert.That(expectedGcRatio, Is.EqualTo(highestGcRatio));
            Assert.That(expectedFastaLabel, Is.EqualTo(highestFastaLabel));
        }
示例#7
0
        static void Main(string[] args)
        {
            string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);

            string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set
            string[] lines = { "" };
            foreach (string file in files)
            {
                lines = File.ReadAllLines(file);
            }
            List <Fasta> fastaList = Fasta.DecodeFasta(lines);

            string[] strings = new string[fastaList.Count];
            for (int i = 0; i < fastaList.Count; i++)
            {
                strings[i] = fastaList[i].DNA;
            }

            ProfileMatrix matrix = new ProfileMatrix(strings);

            Console.WriteLine(matrix.ConsensusString);
            ProfileMatrix.OutputProfileMatrix(matrix);

            Console.ReadLine();
        }
示例#8
0
        static void Main(string[] args)
        {
            string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);

            string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set
            string[] lines = { "" };
            foreach (string file in files)
            {
                lines = File.ReadAllLines(file);
            }
            List <Fasta> fastaList = Fasta.DecodeFasta(lines);

            string[] dnaStrings = new string[fastaList.Count];
            for (int i = 0; i < fastaList.Count; i++)
            {
                dnaStrings[i] = fastaList[i].DNA;
            }
            string[] introns = dnaStrings.Skip(1).ToArray();
            for (int i = 0; i < introns.Length; i++)
            {
                introns[i] = introns[i].Replace('T', 'U');
            }
            Console.WriteLine(TranscribeRNAToProtein(dnaStrings[0].Replace('T', 'U'), introns));

            Console.ReadLine();
        }
示例#9
0
        // This private method is called when an event should be
        // thrown. It first does context-switching, i.e. setting
        // a public variable (OnInvalidHeader) to a local variable
        // to prevent race conditions. Then it checks to see if
        // there are any event handlers registered with it (the null
        // check). If no one is listening to this event, it does nothing.
        // If there are one or more listeners to this event, it "raises"
        // the event and those are handled by their respective methods.

        private void InvalidHeader(Fasta fasta)
        {
            var handler = OnInvalidHeader;

            if (handler != null)
            {
                handler(this, new FastaEvent(fasta));
            }
        }
示例#10
0
文件: Program.cs 项目: ingted/Causal
    static void Main(string[] _)
    {
        Dbg.Write("Starting Fasta Profiling");

        Perf.causalProfiling(10,
                             FuncConvert.FromAction(() => Fasta.Run(new[] { "25000000" })));

        Dbg.Write("Finished Fasta Profiling");
    }
示例#11
0
        public void HandleMultipleFastaFormatWithEmptyLines()
        {
            FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\n\n>Number2\nATCGGGCTAAT\nATCGGGCTAAT"));
            Fasta       fasta       = fastaReader.Read();

            Assert.AreEqual("DNA", fasta.Header);
            Assert.AreEqual("ATCGGGCTAATATCGGGCTAAT", fasta.Dna);

            fasta = fastaReader.Read();
            Assert.AreEqual("Number2", fasta.Header);
            Assert.AreEqual("ATCGGGCTAATATCGGGCTAAT", fasta.Dna);
        }
示例#12
0
        static void Main(string[] args)
        {
            string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);

            string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set
            string[] lines = { "" };
            foreach (string file in files)
            {
                lines = File.ReadAllLines(file);
            }

            Regex regex = new Regex(@"N[^P][ST][^P]", RegexOptions.Multiline);

            string url = @"http://www.uniprot.org/uniprot/";
            List <(string id, int[] positions)> tuples = new List <(string id, int[] positions)>();

            foreach (string protein in lines)
            {
                List <int> positions = new List <int>();
                (string id, int[] positions)tuple;
                tuple.id = protein;
                string html  = MakeGetRequest(url + protein + ".fasta");
                Fasta  fasta = Fasta.DecodeFasta(html.Split('\n'))[0];
                for (int i = 0; i < fasta.DNA.Length - 4; i++)
                {
                    if (regex.IsMatch(fasta.DNA.Substring(i, 4)))
                    {
                        positions.Add(i + 1);
                    }
                }
                if (positions.Count != 0)
                {
                    tuple.positions = positions.ToArray();
                    tuples.Add(tuple);
                }
            }


            for (int i = 0; i < tuples.Count; i++)
            {
                if (tuples[i].positions.Length != 0)
                {
                    Console.WriteLine(tuples[i].id);
                    tuples[i].positions.ToList().ForEach(s => Console.Write(s + " "));
                    Console.WriteLine();
                }
            }

            Console.ReadLine();
        }
示例#13
0
        private List <List <Variant> > GetMutations(Fasta seq, WregexResult r)
        {
            int i1 = 0, i2;

            while (i1 < seq.mVariants.Count && (int)seq.mVariants[i1].pos < r.Index)
            {
                i1++;
            }
            i2 = i1;
            while (i2 < seq.mVariants.Count && (int)seq.mVariants[i2].pos < (r.Index + r.Length))
            {
                i2++;
            }

            return(GetMutations(seq, i1, i2 - 1));
        }
示例#14
0
        // local の BLASTDB に対して Accession で検索します。
        public static KeyValuePair <string, string> GetCoronaReference(string accession, ref string message)
        {
            var localReference = Path.Combine(
                FluGASv25.Proc.Flow.CommonFlow.GetBlastReferenceDir,
                CommonFlow.covBaseName + FnaFooter);

            var fastaDic     = Fasta.FastaFile2Dic(localReference);
            var targetFastas = fastaDic.Where(s => s.Key.Split(".").First() == accession);

            if (targetFastas.Any())
            {
                return(targetFastas.First()); // 正常取得
            }
            // error...
            message += "not found accession, " + accession;
            return(new KeyValuePair <string, string>(string.Empty, string.Empty));
        }
示例#15
0
        private List <List <Variant> > GetMutations(Fasta seq, int max)
        {
            List <List <Variant> > result = new List <List <Variant> >();
            int i1 = 0, i2 = 0;

            while (i1 < seq.mVariants.Count)
            {
                do
                {
                    i2++;
                } while(i2 < seq.mVariants.Count && (seq.mVariants[i2].pos - seq.mVariants[i2 - 1].pos) <= (ulong)max);
                result.AddRange(GetMutations(seq, i1, i2 - 1));
                i1 = i2;
            }

            return(result);
        }
示例#16
0
        private void LoadFasta(string path)
        {
            string line;

            //char[] sep = new char[]{'|',' ','\t'};
            char[]  sep = new char[] { '|' };
            Variant v;
            UnixCfg rd = new UnixCfg(path);

            line = rd.ReadUnixLine();
            if (line == null || line[0] != '>')
            {
                throw new ApplicationException("FASTA header not found");
            }
            Fasta f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), "");

            do
            {
                line = rd.ReadUnixLine();
                if (line == null || line[0] == '>')             // EOF or next element
                {
                    if (f.mSequence.Length == 0)
                    {
                        throw new ApplicationException("FASTA sequence not found");
                    }
                    mSeqs.Add(f);
                    if (line != null)
                    {
                        f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), "");
                    }
                }
                else if (line.StartsWith("NP_"))              // Variant
                {
                    v = new Variant(line);
                    if (!f.mVariants.Contains(v))
                    {
                        f.mVariants.Add(v);
                    }
                }
                else         // Sequence
                {
                    f.mSequence += line;
                }
            } while(line != null);
            rd.Close();
        }
示例#17
0
        private void LoadXml(string path, SortedList <string, List <Variant> > list)
        {
            UniprotXml xml = new UniprotXml(path);

            EhuBio.Database.Ebi.Xml.entry e;
            Fasta f    = null;
            bool  skip = false;

            while ((e = xml.ReadEntry()) != null)
            {
                if (e.sequence == null || e.sequence.Value == null || e.sequence.Value.Length == 0)
                {
                    continue;
                }
                if (list != null)
                {
                    skip = true;
                    foreach (EhuBio.Database.Ebi.Xml.featureType feature in e.feature)
                    {
                        if (feature.type == EhuBio.Database.Ebi.Xml.featureTypeType.sequencevariant && feature.id != null)
                        {
                            if (list.ContainsKey(feature.id))
                            {
                                if (skip == true)
                                {
                                    f    = new Fasta(Fasta.Type.Protein, e.accession[0], e.sequence.Value);
                                    skip = false;
                                }
                                f.mVariants.AddRange(list[feature.id]);
                            }
                        }
                    }
                }
                else
                {
                    f = new Fasta(Fasta.Type.Protein, e.accession[0], e.sequence.Value);
                }
                if (skip)
                {
                    continue;
                }
                f.Dump(true);
                mSeqs.Add(f);
            }
            xml.Close();
        }
示例#18
0
        static void Main(string[] args)
        {
            string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);

            string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set
            string[] lines = { "" };
            foreach (string file in files)
            {
                lines = File.ReadAllLines(file);
            }

            Fasta fasta = Fasta.DecodeFasta(lines)[0];

            List <string> proteins = ReadFrames(fasta.DNA);

            proteins.ForEach((s) => Console.WriteLine(s));
            Console.ReadLine();
        }
示例#19
0
        static void Main(string[] args)
        {
            string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);

            string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set
            string[] lines = { "" };
            foreach (string file in files)
            {
                lines = File.ReadAllLines(file);
            }

            Fasta fasta = Fasta.DecodeFasta(lines)[0];


            int?[][] palindromes = FindDNAReversePalindrome(fasta.DNA, 4, 12);



            Console.WriteLine();

            Console.ReadLine();
        }
        /// <summary>
        /// Solves the GC problem (http://rosalind.info/problems/gc/).
        /// </summary>
        private static void SolveGc()
        {
            var fasta             = new Fasta();
            var entries           = fasta.ReadEntries(@"Data\GC\rosalind_gc.txt");
            var highestGCRatio    = 0m;
            var highestFastaLabel = "";

            foreach (var entry in entries)
            {
                var ratio = entry.Dna.CalculateGcRatio();
                if (ratio > highestGCRatio)
                {
                    highestGCRatio    = ratio;
                    highestFastaLabel = entry.Label;
                }
            }

            highestGCRatio = Math.Round(highestGCRatio * 100, 6);

            var result = String.Format("{0}\n{1}%", highestFastaLabel, highestGCRatio).Replace(',', '.');

            SaveResult(@"Results\rosalind_gc_results.txt", result);
        }
示例#21
0
文件: Main.cs 项目: akrogp/EhuBio
        private List<WregexResult> GetTotalVariantsResults( Fasta seq )
        {
            List<WregexResult> ret = new List<WregexResult>();
            WregexResult[] orig, mut;
            bool found;
            int i, j;
            List<List<Variant>> mutations;
            string id;

            // Original (without mutations)
            orig = mRegex.Search(seq.mSequence, seq.ID+"-orig").ToArray();
            ret.AddRange( orig );

            // Lost
            foreach( WregexResult r in orig ) {
            /*if( r.Entry.Contains("NP_002968.1") )
                Console.WriteLine( "KK" );*/
            mutations = GetMutations( seq, r );
            foreach( List<Variant> m in mutations )
                if( GetVariantsResults(seq.ID, r.Match, r.Index, m).Count == 0 ) {
                    WregexResult r2 = r;
                    r2.Type = ResultType.Lost;
                    r2.Entry = r2.Entry.Replace("-orig","-lost") + GetID(m, r, true);
                    ret.Add( r2 );
                }
            }

            // Mutations
            mutations = GetMutations( seq, mRegex.MaxLength );
            mut = GetVariantsResults( seq.ID, seq.mSequence, mutations ).ToArray();
            for( i = 0; i < mut.Length; i++ ) {
            // Filter duplicates
            found = false;
            foreach( WregexResult r in ret )
                if( r.Index == mut[i].Index && r.Match.Equals(mut[i].Match) ) {
                    found = true;
                    break;
                }
            if( found )
                continue;
            // Assign names
            id = GetID ( seq.mVariants, mut[i] );
            if( id.Length == 0 )
                continue;
            mut[i].Entry += id;
            // Gained
            found = false;
            for( j = 0; j < orig.Length; j++ )
                if( mut[i].Index == orig[j].Index ) {
                    found = true;
                    break;
                }
            if( !found )
                mut[i].Type = ResultType.Gained;
            ret.Add( mut[i] );
            }

            return ret.Count == 0 ? null : ret;
        }
示例#22
0
 public void ReadReturnsFastaFormat()
 {
     FastaReader fastaReader = new FastaReader(new StringReader(""));
     Fasta       fasta       = fastaReader.Read();
 }
示例#23
0
 public void Write(Fasta fasta)
 {
     Write(fasta.Sequence, fasta.Description);
 }
示例#24
0
文件: Main.cs 项目: akrogp/EhuBio
        private List<List<Variant>> GetMutations( Fasta seq, int max )
        {
            List<List<Variant>> result = new List<List<Variant>>();
            int i1 = 0, i2 = 0;

            while( i1 < seq.mVariants.Count ) {
            do {
                i2++;
            } while( i2 < seq.mVariants.Count && (seq.mVariants[i2].pos-seq.mVariants[i2-1].pos) <= (ulong)max );
            result.AddRange(GetMutations(seq,i1,i2-1));
            i1 = i2;
            }

            return result;
        }
示例#25
0
        private List <WregexResult> GetTotalVariantsResults(Fasta seq)
        {
            List <WregexResult> ret = new List <WregexResult>();

            WregexResult[]         orig, mut;
            bool                   found;
            int                    i, j;
            List <List <Variant> > mutations;
            string                 id;

            // Original (without mutations)
            orig = mRegex.Search(seq.mSequence, seq.ID + "-orig").ToArray();
            ret.AddRange(orig);

            // Lost
            foreach (WregexResult r in orig)
            {
                /*if( r.Entry.Contains("NP_002968.1") )
                 *      Console.WriteLine( "KK" );*/
                mutations = GetMutations(seq, r);
                foreach (List <Variant> m in mutations)
                {
                    if (GetVariantsResults(seq.ID, r.Match, r.Index, m).Count == 0)
                    {
                        WregexResult r2 = r;
                        r2.Type  = ResultType.Lost;
                        r2.Entry = r2.Entry.Replace("-orig", "-lost") + GetID(m, r, true);
                        ret.Add(r2);
                    }
                }
            }

            // Mutations
            mutations = GetMutations(seq, mRegex.MaxLength);
            mut       = GetVariantsResults(seq.ID, seq.mSequence, mutations).ToArray();
            for (i = 0; i < mut.Length; i++)
            {
                // Filter duplicates
                found = false;
                foreach (WregexResult r in ret)
                {
                    if (r.Index == mut[i].Index && r.Match.Equals(mut[i].Match))
                    {
                        found = true;
                        break;
                    }
                }
                if (found)
                {
                    continue;
                }
                // Assign names
                id = GetID(seq.mVariants, mut[i]);
                if (id.Length == 0)
                {
                    continue;
                }
                mut[i].Entry += id;
                // Gained
                found = false;
                for (j = 0; j < orig.Length; j++)
                {
                    if (mut[i].Index == orig[j].Index)
                    {
                        found = true;
                        break;
                    }
                }
                if (!found)
                {
                    mut[i].Type = ResultType.Gained;
                }
                ret.Add(mut[i]);
            }

            return(ret.Count == 0 ? null : ret);
        }
示例#26
0
        private List <List <Variant> > GetMutations(Fasta seq, int i1, int i2)
        {
            List <List <Variant> > result = new List <List <Variant> >();
            int len = i2 - i1 + 1;

            if (len <= 0)
            {
                return(result);
            }

            List <Variant> comb;

            char[] array;
            int    combinations = 1 << len;
            bool   dup;

            for (int i = 1; i < combinations; i++)
            {
                array = Convert.ToString(i, 2).ToCharArray();
                comb  = new List <Variant>();
                for (int j = 0; j < array.Length; j++)
                {
                    if (array[array.Length - j - 1] == '1')
                    {
                        dup = false;
                        foreach (Variant v in comb)
                        {
                            if (v.pos == seq.mVariants[i1 + j].pos)
                            {
                                dup = true;
                                break;
                            }
                        }
                        if (!dup)
                        {
                            comb.Add(seq.mVariants[i1 + j]);
                        }
                    }
                }
                dup = false;
                foreach (List <Variant> list in result)
                {
                    if (list.Count != comb.Count)
                    {
                        continue;
                    }
                    dup = true;
                    foreach (Variant v in comb)
                    {
                        if (!list.Contains(v))
                        {
                            dup = false;
                            break;
                        }
                    }
                    if (dup)
                    {
                        break;
                    }
                }
                if (!dup)
                {
                    result.Add(comb);
                }
            }

            return(result);
        }
示例#27
0
 public FastaEvent(Fasta fasta)
 {
     Fasta = fasta;
 }
示例#28
0
 private List <List <Variant> > GetMutations(Fasta seq)
 {
     return(GetMutations(seq, 20000));
 }
示例#29
0
文件: Main.cs 项目: akrogp/EhuBio
        private List<List<Variant>> GetMutations( Fasta seq, WregexResult r )
        {
            int i1 = 0, i2;

            while( i1 < seq.mVariants.Count && (int)seq.mVariants[i1].pos < r.Index )
            i1++;
            i2 = i1;
            while( i2 < seq.mVariants.Count && (int)seq.mVariants[i2].pos < (r.Index+r.Length) )
            i2++;

            return GetMutations( seq, i1, i2-1 );
        }
示例#30
0
文件: Main.cs 项目: akrogp/EhuBio
 private void LoadXml( string path, SortedList<string,List<Variant>> list )
 {
     UniprotXml xml = new UniprotXml( path );
     EhuBio.Database.Ebi.Xml.entry e;
     Fasta f = null;
     bool skip = false;
     while( (e=xml.ReadEntry()) != null ) {
     if( e.sequence == null || e.sequence.Value == null || e.sequence.Value.Length == 0 )
         continue;
     if( list != null ) {
         skip = true;
         foreach( EhuBio.Database.Ebi.Xml.featureType feature in e.feature )
             if( feature.type == EhuBio.Database.Ebi.Xml.featureTypeType.sequencevariant && feature.id != null )
                 if( list.ContainsKey(feature.id) ) {
                     if( skip == true ) {
                         f = new Fasta( Fasta.Type.Protein, e.accession[0], e.sequence.Value );
                         skip = false;
                     }
                     f.mVariants.AddRange( list[feature.id] );
                 }
     } else
         f = new Fasta( Fasta.Type.Protein, e.accession[0], e.sequence.Value );
     if( skip )
         continue;
     f.Dump( true );
     mSeqs.Add( f );
     }
     xml.Close();
 }
示例#31
0
文件: Main.cs 项目: akrogp/EhuBio
        private List<List<Variant>> GetMutations( Fasta seq, int i1, int i2 )
        {
            List<List<Variant>> result = new List<List<Variant>>();
            int len = i2 - i1 + 1;
            if( len <= 0 )
            return result;

            List<Variant> comb;
            char[] array;
            int combinations = 1 << len;
            bool dup;
            for( int i = 1; i < combinations; i++ ) {
            array = Convert.ToString(i,2).ToCharArray();
            comb = new List<Variant>();
            for( int j = 0; j < array.Length; j++ )
                if( array[array.Length-j-1] == '1' ) {
                    dup = false;
                    foreach( Variant v in comb )
                        if( v.pos == seq.mVariants[i1+j].pos ) {
                            dup = true;
                            break;
                        }
                    if( !dup )
                        comb.Add( seq.mVariants[i1+j] );
                }
            dup = false;
            foreach( List<Variant> list in result ) {
                if( list.Count != comb.Count )
                    continue;
                dup = true;
                foreach( Variant v in comb )
                    if( !list.Contains(v) ) {
                        dup = false;
                        break;
                    }
                if( dup )
                    break;
            }
            if( !dup )
                result.Add( comb );
            }

            return result;
        }
示例#32
0
文件: Main.cs 项目: akrogp/EhuBio
 private void LoadFasta( string path )
 {
     string line;
     //char[] sep = new char[]{'|',' ','\t'};
     char[] sep = new char[]{'|'};
     Variant v;
     UnixCfg rd = new UnixCfg( path );
     line = rd.ReadUnixLine();
     if( line == null || line[0] != '>' )
     throw new ApplicationException( "FASTA header not found" );
     Fasta f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), "");
     do {
     line = rd.ReadUnixLine();
     if( line == null || line[0] == '>' ) {	// EOF or next element
         if( f.mSequence.Length == 0 )
             throw new ApplicationException( "FASTA sequence not found" );
         mSeqs.Add(f);
         if( line != null )
             f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), "");
     } else if( line.StartsWith("NP_") ) { // Variant
         v = new Variant(line);
         if( !f.mVariants.Contains(v) )
             f.mVariants.Add(v);
     }
     else // Sequence
         f.mSequence += line;
     } while( line != null );
     rd.Close();
 }
示例#33
0
文件: Main.cs 项目: akrogp/EhuBio
 private List<List<Variant>> GetMutations( Fasta seq )
 {
     return GetMutations( seq, 20000 );
 }
示例#34
0
 public void ReadTest()
 {
     var result = Fasta.Read(@"F:\JICWork\miRNA_no_akr.fasta");
 }
示例#35
0
 public FastaEvent(Fasta fasta)
 {
     Fasta = fasta;
 }
示例#36
0
文件: Main.cs 项目: akrogp/EhuBio
        public static void runCosmic( string file )
        {
            StreamReader rd = new StreamReader(new GZipStream(new FileStream(file,FileMode.Open), CompressionMode.Decompress));
            string line;
            char[] sep1 = new char[]{','};
            char[] sep2 = new char[]{':'};
            string[] fields, fields2;
            String mut, prot;
            Variant v;
            Fasta f;
            List<string> ids = new List<string>();
            SortedList<string,Fasta> fasta = new SortedList<string, Fasta>();
            bool first = true;
            //int count = 1;
            while( (line=rd.ReadLine()) != null ) {
            if( first ) {
                first = false;
                continue;
            }
            fields = line.Split(sep1);
            if( fields[4].Length == 0 )
                continue;
            if( fasta.ContainsKey(fields[4]) )
                f = fasta[fields[4]];
            else {
                WSDBFetchServerService server = new WSDBFetchServerService();
                server.
                f = new Fasta(Fasta.Type.Protein,fields[4],seq);
            }

            v = new Variant();
            if( ids.Contains(fields[4]) )
                continue;
            ids.Add(fields[0]);
            sreq.id = fields[0];
            sres = ssrv.run_eFetch( sreq );
            if( sres == null || sres.ExchangeSet.Rs == null || sres.ExchangeSet.Rs.Length == 0 )
                continue;
            Console.WriteLine( fields[0] + "..." );
            foreach( string str in sres.ExchangeSet.Rs[0].hgvs ) {
                if( !str.StartsWith("NP_") )
                    continue;
                Console.Write( str + " " );
                v = new Variant();
                v.id = str;
                fields2 = str.Split(sep2);
                mut = fields2[1]; prot = fields2[0];
                try {
                    v.orig = AminoAcid.Get(mut.Substring(2,3)).Letter;
                    v.mut = AminoAcid.Get(mut.Substring(mut.Length-3,3)).Letter;
                    v.pos = ulong.Parse(mut.Substring(5,mut.Length-8))-1;
                } catch {
                    Console.WriteLine( "(filtered)" );
                    continue;
                }
                if( fasta.ContainsKey(prot) ) {
                    Console.WriteLine( "(cached)" );
                    f = fasta[prot];
                } else {
                    preq.db = "protein";
                    preq.id = prot;
                    pres = psrv.run_eFetch( preq );
                    f = new Fasta(Fasta.Type.Protein,prot+"|"+pres.GBSet[0].GBSeq_definition,pres.GBSet[0].GBSeq_sequence);
                    fasta.Add( prot, f );
                    Console.WriteLine( "(downloaded)" );
                }
                f.mVariants.Add( v );
            }
            /*if( --count == 0 )
                break;*/
            }

            foreach( Fasta fas in fasta.Values )
            fas.Dump( true );
        }
示例#37
0
 // This private method is called when an event should be
 // thrown. It first does context-switching, i.e. setting
 // a public variable (OnInvalidHeader) to a local variable
 // to prevent race conditions. Then it checks to see if
 // there are any event handlers registered with it (the null
 // check). If no one is listening to this event, it does nothing.
 // If there are one or more listeners to this event, it "raises"
 // the event and those are handled by their respective methods.
 private void InvalidHeader(Fasta fasta)
 {
     var handler = OnInvalidHeader;
     if (handler != null)
     {
         handler(this, new FastaEvent(fasta));
     }
 }
示例#38
0
        public static void runEnsembl(string file)
        {
            StreamReader rd = new StreamReader(new GZipStream(new FileStream(file, FileMode.Open), CompressionMode.Decompress));
            string       line;

            char[]        sep1 = new char[] { ',' };
            char[]        sep2 = new char[] { ':' };
            string[]      fields, fields2;
            String        mut, prot;
            Variant       v;
            Fasta         f;
            List <string> ids = new List <string>();
            SortedList <string, Fasta> fasta = new SortedList <string, Fasta>();
            eFetchSnpService           ssrv  = new eFetchSnpService();

            EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchRequest sreq = new EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchRequest();
            EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchResult  sres;
            eFetchSequenceService psrv = new eFetchSequenceService();

            EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchRequest preq = new EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchRequest();
            EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchResult  pres;
            //int count = 1;
            while ((line = rd.ReadLine()) != null)
            {
                fields = line.Split(sep1);
                if (!fields[2].Contains("/") || fields[3].Length == 0 || fields[3] != fields[4])
                {
                    continue;
                }
                v = new Variant();
                if (ids.Contains(fields[0]))
                {
                    continue;
                }
                ids.Add(fields[0]);
                sreq.id = fields[0];
                sres    = ssrv.run_eFetch(sreq);
                if (sres == null || sres.ExchangeSet.Rs == null || sres.ExchangeSet.Rs.Length == 0)
                {
                    continue;
                }
                Console.WriteLine(fields[0] + "...");
                foreach (string str in sres.ExchangeSet.Rs[0].hgvs)
                {
                    if (!str.StartsWith("NP_"))
                    {
                        continue;
                    }
                    Console.Write(str + " ");
                    v       = new Variant();
                    v.id    = str;
                    fields2 = str.Split(sep2);
                    mut     = fields2[1]; prot = fields2[0];
                    try {
                        v.orig = AminoAcid.Get(mut.Substring(2, 3)).Letter;
                        v.mut  = AminoAcid.Get(mut.Substring(mut.Length - 3, 3)).Letter;
                        v.pos  = ulong.Parse(mut.Substring(5, mut.Length - 8)) - 1;
                    } catch {
                        Console.WriteLine("(filtered)");
                        continue;
                    }
                    if (fasta.ContainsKey(prot))
                    {
                        Console.WriteLine("(cached)");
                        f = fasta[prot];
                    }
                    else
                    {
                        preq.db = "protein";
                        preq.id = prot;
                        pres    = psrv.run_eFetch(preq);
                        f       = new Fasta(Fasta.Type.Protein, prot + "|" + pres.GBSet[0].GBSeq_definition, pres.GBSet[0].GBSeq_sequence);
                        fasta.Add(prot, f);
                        Console.WriteLine("(downloaded)");
                    }
                    f.mVariants.Add(v);
                }

                /*if( --count == 0 )
                 *      break;*/
            }

            foreach (Fasta fas in fasta.Values)
            {
                fas.Dump(true);
            }
        }