public void AddFastaFile(StreamReader reader, ProgressMonitor progressMonitor) { Dictionary<string, ProtIdNames> proteinIds = new Dictionary<string, ProtIdNames>(); using (ISession session = OpenWriteSession()) { foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List()) { if (protein.Id.HasValue) proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, protein.Names)); } int proteinCount = 0; using (var transaction = session.BeginTransaction()) using (IDbCommand insertProtein = session.Connection.CreateCommand()) using (IDbCommand insertName = session.Connection.CreateCommand()) { WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now insertProtein.CommandText = "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();"; // Not L10N insertProtein.Parameters.Add(new SQLiteParameter()); insertName.CommandText = "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N insertName.Parameters.Add(new SQLiteParameter()); // Id insertName.Parameters.Add(new SQLiteParameter()); // IsPrimary insertName.Parameters.Add(new SQLiteParameter()); // Name insertName.Parameters.Add(new SQLiteParameter()); // Description insertName.Parameters.Add(new SQLiteParameter()); // PreferredName insertName.Parameters.Add(new SQLiteParameter()); // Accession insertName.Parameters.Add(new SQLiteParameter()); // Gene insertName.Parameters.Add(new SQLiteParameter()); // Species insertName.Parameters.Add(new SQLiteParameter()); // WebSearchInfo foreach (DbProtein protein in fastaImporter.Import(reader)) { int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1)); if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins,proteinCount), iProgress)) { return; } bool existingProtein = false; ProtIdNames proteinIdNames; if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames)) { existingProtein = true; } else { ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence; proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]); proteinIds.Add(protein.Sequence, proteinIdNames); proteinCount++; } foreach (var proteinName in protein.Names) { // Skip any names that already exist if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name))) continue; try { ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id; ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein; ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name; ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description; ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName; ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession; ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene; ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species; ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization insertName.ExecuteNonQuery(); } catch (Exception exception) { Console.Out.WriteLine(exception); } } } if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99)) { return; } transaction.Commit(); } AnalyzeDb(session); progressMonitor.Invoke( string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100); } }
private void DoWork() { try { // just do the basic name+description parsing, no regex or web access - we don't use extended metadata here var fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.FakeWebSearchProvider()); var proteins = new Dictionary<string, ProteinData>(); foreach (var path in FastaFilePaths) { var statusText = "Reading FASTA file " + Path.GetFileName(path); var fileInfo = new FileInfo(path); var reader = File.OpenText(path); foreach (var protein in fastaImporter.Import(File.OpenText(path))) { if (!UpdateProgress(statusText, (int) (reader.BaseStream.Position * 100 / fileInfo.Length))) { return; } ProteinData proteinData; if (!proteins.TryGetValue(protein.Sequence, out proteinData)) { proteinData = new ProteinData(protein.Sequence); proteins.Add(protein.Sequence, proteinData); } foreach (var name in protein.Names) { proteinData.AddName(name.Name, name.Description); } } } UpdatePeptides(proteins); if (!IsDisposed) { BeginInvoke(new Action(Close)); } } finally { lock(this) { _running = false; Monitor.PulseAll(this); } } }