static void Main(string[] args) { string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set string[] lines = { "" }; lines = File.ReadAllLines(files[0]); List <Fasta> fastas = Fasta.DecodeFasta(lines); Stopwatch itime = new Stopwatch(); string[] dnaOnly = new string[fastas.Count]; for (int i = 0; i < fastas.Count; i++) { dnaOnly[i] = fastas[i].DNA; } itime.Start(); Console.WriteLine(LongestCommonSubstring(dnaOnly.ToList())); itime.Stop(); Console.WriteLine("Took: {0}ms", itime.ElapsedMilliseconds); Console.ReadLine(); }
static void databaseMaker_OnInvalidHeader(object sender, FastaEvent e) { Fasta fasta = e.Fasta; Console.WriteLine("\nInvalid Header!"); Console.WriteLine(fasta.Description); }
public void PopulateATCGDnaFastaFormat() { FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCG")); Fasta fasta = fastaReader.Read(); Assert.AreEqual("ATCG", fasta.Dna); }
public void PopulateDEFHeaderFastaFormat() { FastaReader fastaReader = new FastaReader(new StringReader(">DEF")); Fasta fasta = fastaReader.Read(); Assert.AreEqual("DEF", fasta.Header); }
public void PopulateMultiLineDnaFastaFormat() { FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\nATCGGGCTAAT\nATCGGGCTAAT")); Fasta fasta = fastaReader.Read(); Assert.AreEqual("ATCGGGCTAATATCGGGCTAATATCGGGCTAATATCGGGCTAAT", fasta.Dna); }
public void GcContentTests() { // Arrange var fasta = new Fasta(); const int entryCount = 3; var highestGcRatio = 0m; const decimal expectedGcRatio = 60.919540m; var highestFastaLabel = ""; const string expectedFastaLabel = "Rosalind_0808"; // Act var entries = fasta.ReadEntries(@"Data\GC\GC_SampleDataSet.txt"); foreach (var entry in entries) { var ratio = entry.Dna.CalculateGcRatio(); if (ratio > highestGcRatio) { highestGcRatio = ratio; highestFastaLabel = entry.Label; } } highestGcRatio = Math.Round(highestGcRatio * 100, 6); // Assert Assert.That(entryCount, Is.EqualTo(entries.Count)); Assert.That(expectedGcRatio, Is.EqualTo(highestGcRatio)); Assert.That(expectedFastaLabel, Is.EqualTo(highestFastaLabel)); }
static void Main(string[] args) { string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set string[] lines = { "" }; foreach (string file in files) { lines = File.ReadAllLines(file); } List <Fasta> fastaList = Fasta.DecodeFasta(lines); string[] strings = new string[fastaList.Count]; for (int i = 0; i < fastaList.Count; i++) { strings[i] = fastaList[i].DNA; } ProfileMatrix matrix = new ProfileMatrix(strings); Console.WriteLine(matrix.ConsensusString); ProfileMatrix.OutputProfileMatrix(matrix); Console.ReadLine(); }
static void Main(string[] args) { string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set string[] lines = { "" }; foreach (string file in files) { lines = File.ReadAllLines(file); } List <Fasta> fastaList = Fasta.DecodeFasta(lines); string[] dnaStrings = new string[fastaList.Count]; for (int i = 0; i < fastaList.Count; i++) { dnaStrings[i] = fastaList[i].DNA; } string[] introns = dnaStrings.Skip(1).ToArray(); for (int i = 0; i < introns.Length; i++) { introns[i] = introns[i].Replace('T', 'U'); } Console.WriteLine(TranscribeRNAToProtein(dnaStrings[0].Replace('T', 'U'), introns)); Console.ReadLine(); }
// This private method is called when an event should be // thrown. It first does context-switching, i.e. setting // a public variable (OnInvalidHeader) to a local variable // to prevent race conditions. Then it checks to see if // there are any event handlers registered with it (the null // check). If no one is listening to this event, it does nothing. // If there are one or more listeners to this event, it "raises" // the event and those are handled by their respective methods. private void InvalidHeader(Fasta fasta) { var handler = OnInvalidHeader; if (handler != null) { handler(this, new FastaEvent(fasta)); } }
static void Main(string[] _) { Dbg.Write("Starting Fasta Profiling"); Perf.causalProfiling(10, FuncConvert.FromAction(() => Fasta.Run(new[] { "25000000" }))); Dbg.Write("Finished Fasta Profiling"); }
public void HandleMultipleFastaFormatWithEmptyLines() { FastaReader fastaReader = new FastaReader(new StringReader(">DNA\nATCGGGCTAAT\nATCGGGCTAAT\n\n>Number2\nATCGGGCTAAT\nATCGGGCTAAT")); Fasta fasta = fastaReader.Read(); Assert.AreEqual("DNA", fasta.Header); Assert.AreEqual("ATCGGGCTAATATCGGGCTAAT", fasta.Dna); fasta = fastaReader.Read(); Assert.AreEqual("Number2", fasta.Header); Assert.AreEqual("ATCGGGCTAATATCGGGCTAAT", fasta.Dna); }
static void Main(string[] args) { string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set string[] lines = { "" }; foreach (string file in files) { lines = File.ReadAllLines(file); } Regex regex = new Regex(@"N[^P][ST][^P]", RegexOptions.Multiline); string url = @"http://www.uniprot.org/uniprot/"; List <(string id, int[] positions)> tuples = new List <(string id, int[] positions)>(); foreach (string protein in lines) { List <int> positions = new List <int>(); (string id, int[] positions)tuple; tuple.id = protein; string html = MakeGetRequest(url + protein + ".fasta"); Fasta fasta = Fasta.DecodeFasta(html.Split('\n'))[0]; for (int i = 0; i < fasta.DNA.Length - 4; i++) { if (regex.IsMatch(fasta.DNA.Substring(i, 4))) { positions.Add(i + 1); } } if (positions.Count != 0) { tuple.positions = positions.ToArray(); tuples.Add(tuple); } } for (int i = 0; i < tuples.Count; i++) { if (tuples[i].positions.Length != 0) { Console.WriteLine(tuples[i].id); tuples[i].positions.ToList().ForEach(s => Console.Write(s + " ")); Console.WriteLine(); } } Console.ReadLine(); }
private List <List <Variant> > GetMutations(Fasta seq, WregexResult r) { int i1 = 0, i2; while (i1 < seq.mVariants.Count && (int)seq.mVariants[i1].pos < r.Index) { i1++; } i2 = i1; while (i2 < seq.mVariants.Count && (int)seq.mVariants[i2].pos < (r.Index + r.Length)) { i2++; } return(GetMutations(seq, i1, i2 - 1)); }
// local の BLASTDB に対して Accession で検索します。 public static KeyValuePair <string, string> GetCoronaReference(string accession, ref string message) { var localReference = Path.Combine( FluGASv25.Proc.Flow.CommonFlow.GetBlastReferenceDir, CommonFlow.covBaseName + FnaFooter); var fastaDic = Fasta.FastaFile2Dic(localReference); var targetFastas = fastaDic.Where(s => s.Key.Split(".").First() == accession); if (targetFastas.Any()) { return(targetFastas.First()); // 正常取得 } // error... message += "not found accession, " + accession; return(new KeyValuePair <string, string>(string.Empty, string.Empty)); }
private List <List <Variant> > GetMutations(Fasta seq, int max) { List <List <Variant> > result = new List <List <Variant> >(); int i1 = 0, i2 = 0; while (i1 < seq.mVariants.Count) { do { i2++; } while(i2 < seq.mVariants.Count && (seq.mVariants[i2].pos - seq.mVariants[i2 - 1].pos) <= (ulong)max); result.AddRange(GetMutations(seq, i1, i2 - 1)); i1 = i2; } return(result); }
private void LoadFasta(string path) { string line; //char[] sep = new char[]{'|',' ','\t'}; char[] sep = new char[] { '|' }; Variant v; UnixCfg rd = new UnixCfg(path); line = rd.ReadUnixLine(); if (line == null || line[0] != '>') { throw new ApplicationException("FASTA header not found"); } Fasta f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), ""); do { line = rd.ReadUnixLine(); if (line == null || line[0] == '>') // EOF or next element { if (f.mSequence.Length == 0) { throw new ApplicationException("FASTA sequence not found"); } mSeqs.Add(f); if (line != null) { f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), ""); } } else if (line.StartsWith("NP_")) // Variant { v = new Variant(line); if (!f.mVariants.Contains(v)) { f.mVariants.Add(v); } } else // Sequence { f.mSequence += line; } } while(line != null); rd.Close(); }
private void LoadXml(string path, SortedList <string, List <Variant> > list) { UniprotXml xml = new UniprotXml(path); EhuBio.Database.Ebi.Xml.entry e; Fasta f = null; bool skip = false; while ((e = xml.ReadEntry()) != null) { if (e.sequence == null || e.sequence.Value == null || e.sequence.Value.Length == 0) { continue; } if (list != null) { skip = true; foreach (EhuBio.Database.Ebi.Xml.featureType feature in e.feature) { if (feature.type == EhuBio.Database.Ebi.Xml.featureTypeType.sequencevariant && feature.id != null) { if (list.ContainsKey(feature.id)) { if (skip == true) { f = new Fasta(Fasta.Type.Protein, e.accession[0], e.sequence.Value); skip = false; } f.mVariants.AddRange(list[feature.id]); } } } } else { f = new Fasta(Fasta.Type.Protein, e.accession[0], e.sequence.Value); } if (skip) { continue; } f.Dump(true); mSeqs.Add(f); } xml.Close(); }
static void Main(string[] args) { string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set string[] lines = { "" }; foreach (string file in files) { lines = File.ReadAllLines(file); } Fasta fasta = Fasta.DecodeFasta(lines)[0]; List <string> proteins = ReadFrames(fasta.DNA); proteins.ForEach((s) => Console.WriteLine(s)); Console.ReadLine(); }
static void Main(string[] args) { string strPath = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); string[] files = Directory.GetFiles(strPath + @"\Rosalind", "*.txt"); //Assumes only one txt file in this directory - the data set string[] lines = { "" }; foreach (string file in files) { lines = File.ReadAllLines(file); } Fasta fasta = Fasta.DecodeFasta(lines)[0]; int?[][] palindromes = FindDNAReversePalindrome(fasta.DNA, 4, 12); Console.WriteLine(); Console.ReadLine(); }
/// <summary> /// Solves the GC problem (http://rosalind.info/problems/gc/). /// </summary> private static void SolveGc() { var fasta = new Fasta(); var entries = fasta.ReadEntries(@"Data\GC\rosalind_gc.txt"); var highestGCRatio = 0m; var highestFastaLabel = ""; foreach (var entry in entries) { var ratio = entry.Dna.CalculateGcRatio(); if (ratio > highestGCRatio) { highestGCRatio = ratio; highestFastaLabel = entry.Label; } } highestGCRatio = Math.Round(highestGCRatio * 100, 6); var result = String.Format("{0}\n{1}%", highestFastaLabel, highestGCRatio).Replace(',', '.'); SaveResult(@"Results\rosalind_gc_results.txt", result); }
private List<WregexResult> GetTotalVariantsResults( Fasta seq ) { List<WregexResult> ret = new List<WregexResult>(); WregexResult[] orig, mut; bool found; int i, j; List<List<Variant>> mutations; string id; // Original (without mutations) orig = mRegex.Search(seq.mSequence, seq.ID+"-orig").ToArray(); ret.AddRange( orig ); // Lost foreach( WregexResult r in orig ) { /*if( r.Entry.Contains("NP_002968.1") ) Console.WriteLine( "KK" );*/ mutations = GetMutations( seq, r ); foreach( List<Variant> m in mutations ) if( GetVariantsResults(seq.ID, r.Match, r.Index, m).Count == 0 ) { WregexResult r2 = r; r2.Type = ResultType.Lost; r2.Entry = r2.Entry.Replace("-orig","-lost") + GetID(m, r, true); ret.Add( r2 ); } } // Mutations mutations = GetMutations( seq, mRegex.MaxLength ); mut = GetVariantsResults( seq.ID, seq.mSequence, mutations ).ToArray(); for( i = 0; i < mut.Length; i++ ) { // Filter duplicates found = false; foreach( WregexResult r in ret ) if( r.Index == mut[i].Index && r.Match.Equals(mut[i].Match) ) { found = true; break; } if( found ) continue; // Assign names id = GetID ( seq.mVariants, mut[i] ); if( id.Length == 0 ) continue; mut[i].Entry += id; // Gained found = false; for( j = 0; j < orig.Length; j++ ) if( mut[i].Index == orig[j].Index ) { found = true; break; } if( !found ) mut[i].Type = ResultType.Gained; ret.Add( mut[i] ); } return ret.Count == 0 ? null : ret; }
public void ReadReturnsFastaFormat() { FastaReader fastaReader = new FastaReader(new StringReader("")); Fasta fasta = fastaReader.Read(); }
public void Write(Fasta fasta) { Write(fasta.Sequence, fasta.Description); }
private List<List<Variant>> GetMutations( Fasta seq, int max ) { List<List<Variant>> result = new List<List<Variant>>(); int i1 = 0, i2 = 0; while( i1 < seq.mVariants.Count ) { do { i2++; } while( i2 < seq.mVariants.Count && (seq.mVariants[i2].pos-seq.mVariants[i2-1].pos) <= (ulong)max ); result.AddRange(GetMutations(seq,i1,i2-1)); i1 = i2; } return result; }
private List <WregexResult> GetTotalVariantsResults(Fasta seq) { List <WregexResult> ret = new List <WregexResult>(); WregexResult[] orig, mut; bool found; int i, j; List <List <Variant> > mutations; string id; // Original (without mutations) orig = mRegex.Search(seq.mSequence, seq.ID + "-orig").ToArray(); ret.AddRange(orig); // Lost foreach (WregexResult r in orig) { /*if( r.Entry.Contains("NP_002968.1") ) * Console.WriteLine( "KK" );*/ mutations = GetMutations(seq, r); foreach (List <Variant> m in mutations) { if (GetVariantsResults(seq.ID, r.Match, r.Index, m).Count == 0) { WregexResult r2 = r; r2.Type = ResultType.Lost; r2.Entry = r2.Entry.Replace("-orig", "-lost") + GetID(m, r, true); ret.Add(r2); } } } // Mutations mutations = GetMutations(seq, mRegex.MaxLength); mut = GetVariantsResults(seq.ID, seq.mSequence, mutations).ToArray(); for (i = 0; i < mut.Length; i++) { // Filter duplicates found = false; foreach (WregexResult r in ret) { if (r.Index == mut[i].Index && r.Match.Equals(mut[i].Match)) { found = true; break; } } if (found) { continue; } // Assign names id = GetID(seq.mVariants, mut[i]); if (id.Length == 0) { continue; } mut[i].Entry += id; // Gained found = false; for (j = 0; j < orig.Length; j++) { if (mut[i].Index == orig[j].Index) { found = true; break; } } if (!found) { mut[i].Type = ResultType.Gained; } ret.Add(mut[i]); } return(ret.Count == 0 ? null : ret); }
private List <List <Variant> > GetMutations(Fasta seq, int i1, int i2) { List <List <Variant> > result = new List <List <Variant> >(); int len = i2 - i1 + 1; if (len <= 0) { return(result); } List <Variant> comb; char[] array; int combinations = 1 << len; bool dup; for (int i = 1; i < combinations; i++) { array = Convert.ToString(i, 2).ToCharArray(); comb = new List <Variant>(); for (int j = 0; j < array.Length; j++) { if (array[array.Length - j - 1] == '1') { dup = false; foreach (Variant v in comb) { if (v.pos == seq.mVariants[i1 + j].pos) { dup = true; break; } } if (!dup) { comb.Add(seq.mVariants[i1 + j]); } } } dup = false; foreach (List <Variant> list in result) { if (list.Count != comb.Count) { continue; } dup = true; foreach (Variant v in comb) { if (!list.Contains(v)) { dup = false; break; } } if (dup) { break; } } if (!dup) { result.Add(comb); } } return(result); }
public FastaEvent(Fasta fasta) { Fasta = fasta; }
private List <List <Variant> > GetMutations(Fasta seq) { return(GetMutations(seq, 20000)); }
private List<List<Variant>> GetMutations( Fasta seq, WregexResult r ) { int i1 = 0, i2; while( i1 < seq.mVariants.Count && (int)seq.mVariants[i1].pos < r.Index ) i1++; i2 = i1; while( i2 < seq.mVariants.Count && (int)seq.mVariants[i2].pos < (r.Index+r.Length) ) i2++; return GetMutations( seq, i1, i2-1 ); }
private void LoadXml( string path, SortedList<string,List<Variant>> list ) { UniprotXml xml = new UniprotXml( path ); EhuBio.Database.Ebi.Xml.entry e; Fasta f = null; bool skip = false; while( (e=xml.ReadEntry()) != null ) { if( e.sequence == null || e.sequence.Value == null || e.sequence.Value.Length == 0 ) continue; if( list != null ) { skip = true; foreach( EhuBio.Database.Ebi.Xml.featureType feature in e.feature ) if( feature.type == EhuBio.Database.Ebi.Xml.featureTypeType.sequencevariant && feature.id != null ) if( list.ContainsKey(feature.id) ) { if( skip == true ) { f = new Fasta( Fasta.Type.Protein, e.accession[0], e.sequence.Value ); skip = false; } f.mVariants.AddRange( list[feature.id] ); } } else f = new Fasta( Fasta.Type.Protein, e.accession[0], e.sequence.Value ); if( skip ) continue; f.Dump( true ); mSeqs.Add( f ); } xml.Close(); }
private List<List<Variant>> GetMutations( Fasta seq, int i1, int i2 ) { List<List<Variant>> result = new List<List<Variant>>(); int len = i2 - i1 + 1; if( len <= 0 ) return result; List<Variant> comb; char[] array; int combinations = 1 << len; bool dup; for( int i = 1; i < combinations; i++ ) { array = Convert.ToString(i,2).ToCharArray(); comb = new List<Variant>(); for( int j = 0; j < array.Length; j++ ) if( array[array.Length-j-1] == '1' ) { dup = false; foreach( Variant v in comb ) if( v.pos == seq.mVariants[i1+j].pos ) { dup = true; break; } if( !dup ) comb.Add( seq.mVariants[i1+j] ); } dup = false; foreach( List<Variant> list in result ) { if( list.Count != comb.Count ) continue; dup = true; foreach( Variant v in comb ) if( !list.Contains(v) ) { dup = false; break; } if( dup ) break; } if( !dup ) result.Add( comb ); } return result; }
private void LoadFasta( string path ) { string line; //char[] sep = new char[]{'|',' ','\t'}; char[] sep = new char[]{'|'}; Variant v; UnixCfg rd = new UnixCfg( path ); line = rd.ReadUnixLine(); if( line == null || line[0] != '>' ) throw new ApplicationException( "FASTA header not found" ); Fasta f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), ""); do { line = rd.ReadUnixLine(); if( line == null || line[0] == '>' ) { // EOF or next element if( f.mSequence.Length == 0 ) throw new ApplicationException( "FASTA sequence not found" ); mSeqs.Add(f); if( line != null ) f = new Fasta(Fasta.Type.Protein, line.Split(sep)[0].Substring(1), ""); } else if( line.StartsWith("NP_") ) { // Variant v = new Variant(line); if( !f.mVariants.Contains(v) ) f.mVariants.Add(v); } else // Sequence f.mSequence += line; } while( line != null ); rd.Close(); }
private List<List<Variant>> GetMutations( Fasta seq ) { return GetMutations( seq, 20000 ); }
public void ReadTest() { var result = Fasta.Read(@"F:\JICWork\miRNA_no_akr.fasta"); }
public static void runCosmic( string file ) { StreamReader rd = new StreamReader(new GZipStream(new FileStream(file,FileMode.Open), CompressionMode.Decompress)); string line; char[] sep1 = new char[]{','}; char[] sep2 = new char[]{':'}; string[] fields, fields2; String mut, prot; Variant v; Fasta f; List<string> ids = new List<string>(); SortedList<string,Fasta> fasta = new SortedList<string, Fasta>(); bool first = true; //int count = 1; while( (line=rd.ReadLine()) != null ) { if( first ) { first = false; continue; } fields = line.Split(sep1); if( fields[4].Length == 0 ) continue; if( fasta.ContainsKey(fields[4]) ) f = fasta[fields[4]]; else { WSDBFetchServerService server = new WSDBFetchServerService(); server. f = new Fasta(Fasta.Type.Protein,fields[4],seq); } v = new Variant(); if( ids.Contains(fields[4]) ) continue; ids.Add(fields[0]); sreq.id = fields[0]; sres = ssrv.run_eFetch( sreq ); if( sres == null || sres.ExchangeSet.Rs == null || sres.ExchangeSet.Rs.Length == 0 ) continue; Console.WriteLine( fields[0] + "..." ); foreach( string str in sres.ExchangeSet.Rs[0].hgvs ) { if( !str.StartsWith("NP_") ) continue; Console.Write( str + " " ); v = new Variant(); v.id = str; fields2 = str.Split(sep2); mut = fields2[1]; prot = fields2[0]; try { v.orig = AminoAcid.Get(mut.Substring(2,3)).Letter; v.mut = AminoAcid.Get(mut.Substring(mut.Length-3,3)).Letter; v.pos = ulong.Parse(mut.Substring(5,mut.Length-8))-1; } catch { Console.WriteLine( "(filtered)" ); continue; } if( fasta.ContainsKey(prot) ) { Console.WriteLine( "(cached)" ); f = fasta[prot]; } else { preq.db = "protein"; preq.id = prot; pres = psrv.run_eFetch( preq ); f = new Fasta(Fasta.Type.Protein,prot+"|"+pres.GBSet[0].GBSeq_definition,pres.GBSet[0].GBSeq_sequence); fasta.Add( prot, f ); Console.WriteLine( "(downloaded)" ); } f.mVariants.Add( v ); } /*if( --count == 0 ) break;*/ } foreach( Fasta fas in fasta.Values ) fas.Dump( true ); }
public static void runEnsembl(string file) { StreamReader rd = new StreamReader(new GZipStream(new FileStream(file, FileMode.Open), CompressionMode.Decompress)); string line; char[] sep1 = new char[] { ',' }; char[] sep2 = new char[] { ':' }; string[] fields, fields2; String mut, prot; Variant v; Fasta f; List <string> ids = new List <string>(); SortedList <string, Fasta> fasta = new SortedList <string, Fasta>(); eFetchSnpService ssrv = new eFetchSnpService(); EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchRequest sreq = new EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchRequest(); EhuBio.Database.Ncbi.eFetch.Snp.MessageEFetchResult sres; eFetchSequenceService psrv = new eFetchSequenceService(); EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchRequest preq = new EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchRequest(); EhuBio.Database.Ncbi.eFetch.Sequences.MessageEFetchResult pres; //int count = 1; while ((line = rd.ReadLine()) != null) { fields = line.Split(sep1); if (!fields[2].Contains("/") || fields[3].Length == 0 || fields[3] != fields[4]) { continue; } v = new Variant(); if (ids.Contains(fields[0])) { continue; } ids.Add(fields[0]); sreq.id = fields[0]; sres = ssrv.run_eFetch(sreq); if (sres == null || sres.ExchangeSet.Rs == null || sres.ExchangeSet.Rs.Length == 0) { continue; } Console.WriteLine(fields[0] + "..."); foreach (string str in sres.ExchangeSet.Rs[0].hgvs) { if (!str.StartsWith("NP_")) { continue; } Console.Write(str + " "); v = new Variant(); v.id = str; fields2 = str.Split(sep2); mut = fields2[1]; prot = fields2[0]; try { v.orig = AminoAcid.Get(mut.Substring(2, 3)).Letter; v.mut = AminoAcid.Get(mut.Substring(mut.Length - 3, 3)).Letter; v.pos = ulong.Parse(mut.Substring(5, mut.Length - 8)) - 1; } catch { Console.WriteLine("(filtered)"); continue; } if (fasta.ContainsKey(prot)) { Console.WriteLine("(cached)"); f = fasta[prot]; } else { preq.db = "protein"; preq.id = prot; pres = psrv.run_eFetch(preq); f = new Fasta(Fasta.Type.Protein, prot + "|" + pres.GBSet[0].GBSeq_definition, pres.GBSet[0].GBSeq_sequence); fasta.Add(prot, f); Console.WriteLine("(downloaded)"); } f.mVariants.Add(v); } /*if( --count == 0 ) * break;*/ } foreach (Fasta fas in fasta.Values) { fas.Dump(true); } }