public void AddFastaFile(StreamReader reader, ProgressMonitor progressMonitor) { Dictionary <string, ProtIdNames> proteinIds = new Dictionary <string, ProtIdNames>(); using (ISession session = OpenWriteSession()) { foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List()) { if (protein.Id.HasValue) { proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, protein.Names)); } } int proteinCount = 0; using (var transaction = session.BeginTransaction()) using (IDbCommand insertProtein = session.Connection.CreateCommand()) using (IDbCommand insertName = session.Connection.CreateCommand()) { WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now insertProtein.CommandText = "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();"; // Not L10N insertProtein.Parameters.Add(new SQLiteParameter()); insertName.CommandText = "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N insertName.Parameters.Add(new SQLiteParameter()); // Id insertName.Parameters.Add(new SQLiteParameter()); // IsPrimary insertName.Parameters.Add(new SQLiteParameter()); // Name insertName.Parameters.Add(new SQLiteParameter()); // Description insertName.Parameters.Add(new SQLiteParameter()); // PreferredName insertName.Parameters.Add(new SQLiteParameter()); // Accession insertName.Parameters.Add(new SQLiteParameter()); // Gene insertName.Parameters.Add(new SQLiteParameter()); // Species insertName.Parameters.Add(new SQLiteParameter()); // WebSearchInfo foreach (DbProtein protein in fastaImporter.Import(reader)) { int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1)); if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins, proteinCount), iProgress)) { return; } bool existingProtein = false; ProtIdNames proteinIdNames; if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames)) { existingProtein = true; } else { ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence; proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]); proteinIds.Add(protein.Sequence, proteinIdNames); proteinCount++; } foreach (var proteinName in protein.Names) { // Skip any names that already exist if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name))) { continue; } try { ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id; ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein; ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name; ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description; ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName; ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession; ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene; ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species; ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization insertName.ExecuteNonQuery(); } catch (Exception exception) { Console.Out.WriteLine(exception); } } } if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99)) { return; } transaction.Commit(); } AnalyzeDb(session); progressMonitor.Invoke( string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100); } }
public void AddFastaFile(StreamReader reader, IProgressMonitor progressMonitor, ref IProgressStatus status, bool delayAnalyzeDb, out int duplicateSequenceCount) { Dictionary <string, ProtIdNames> proteinIds = new Dictionary <string, ProtIdNames>(); using (IStatelessSession session = SessionFactory.OpenStatelessSession()) // This is a long session, but there's no harm since db is useless till its done { var proteinNames = session.CreateCriteria <DbProteinName>().List <DbProteinName>().ToLookup(name => name.Id.Value); foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List()) { if (protein.Id.HasValue) { proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, proteinNames[protein.Id.Value].ToArray())); } } int proteinCount = 0; duplicateSequenceCount = 0; using (var transaction = session.BeginTransaction()) using (IDbCommand insertProtein = session.Connection.CreateCommand()) using (IDbCommand insertName = session.Connection.CreateCommand()) { WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now insertProtein.CommandText = "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();"; // Not L10N insertProtein.Parameters.Add(new SQLiteParameter()); insertName.CommandText = "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N insertName.Parameters.Add(new SQLiteParameter()); // Id insertName.Parameters.Add(new SQLiteParameter()); // IsPrimary insertName.Parameters.Add(new SQLiteParameter()); // Name insertName.Parameters.Add(new SQLiteParameter()); // Description insertName.Parameters.Add(new SQLiteParameter()); // PreferredName insertName.Parameters.Add(new SQLiteParameter()); // Accession insertName.Parameters.Add(new SQLiteParameter()); // Gene insertName.Parameters.Add(new SQLiteParameter()); // Species insertName.Parameters.Add(new SQLiteParameter()); // WebSearchInfo foreach (DbProtein protein in fastaImporter.Import(reader)) { int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1)); if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins, proteinCount), iProgress)) { return; } bool existingProtein = false; ProtIdNames proteinIdNames; if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames)) { existingProtein = true; duplicateSequenceCount++; } else { ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence; proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]); proteinIds.Add(protein.Sequence, proteinIdNames); proteinCount++; } foreach (var proteinName in protein.Names) { // Skip any names that already exist if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name))) { continue; } try { ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id; ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein; ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name; ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description; ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName; ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession; ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene; ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species; ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization insertName.ExecuteNonQuery(); } catch (Exception exception) { Console.Out.WriteLine(exception); } } } if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, Resources.ProteomeDb_AddFastaFile_Saving_changes, 99)) { return; } if (HasSubsequencesTable(() => session.Connection)) { DigestProteins(session.Connection, proteinIds, progressMonitor, ref status); } if (progressMonitor.IsCanceled) { return; } transaction.Commit(); } if (!delayAnalyzeDb) { AnalyzeDb(session.Connection); // NB This runs asynchronously and may interfere with further writes } UpdateProgressAndCheckForCancellation(progressMonitor, ref status, string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100); } }