/// <summary> /// Access the web to resolve protein metadata not directly found in fasta file. /// The fasta text importer will have left search hints in ProteinMetadata. /// </summary> /// <param name="progressMonitor"></param> /// <param name="fastaImporter">object that accesses the web, or pretends to if in a test</param> /// <param name="polite">if true, don't try to resolve everything in one go, assume we can come back later</param> /// <returns>true on success</returns> public bool LookupProteinMetadata(ProgressMonitor progressMonitor, WebEnabledFastaImporter fastaImporter, bool polite = false) { var unsearchedProteins = new List <ProteinSearchInfo>(); List <DbProteinName> untaggedProteins; using (ISession session = OpenSession()) { if (!progressMonitor.Invoke(Resources.ProteomeDb_LookupProteinMetadata_looking_for_unresolved_protein_details, 0)) { return(false); } // get a list of proteins with unresolved metadata websearches var proteinNames = session.CreateCriteria(typeof(DbProteinName)).List <DbProteinName>(); var proteinsToSearch = proteinNames.Where(proteinName => (proteinName.GetProteinMetadata().GetPendingSearchTerm().Length > 0)) .ToList(); // and a list of proteins which have never been considered for metadata search untaggedProteins = proteinNames.Where(proteinName => proteinName.WebSearchInfo.IsEmpty()).ToList(); foreach (var untaggedProtein in untaggedProteins) { untaggedProtein.SetWebSearchCompleted(); // by default take this out of consideration for next time var metadata = untaggedProtein.GetProteinMetadata(); if (metadata.HasMissingMetadata()) { var search = fastaImporter.ParseProteinMetaData(metadata); if (search != null) { metadata = untaggedProtein.ChangeProteinMetadata(metadata.Merge(search)); // don't stomp name by accident metadata = untaggedProtein.ChangeProteinMetadata(metadata.ChangeWebSearchInfo(search.WebSearchInfo)); } } if (metadata.NeedsSearch()) { proteinsToSearch.Add(untaggedProtein); // add to the list of things to commit back to the db } } // Get the lengths of the sequences without getting the sequences themselves, for best speed var proteinIds = proteinsToSearch.Select(name => name.Protein.Id.Value).Distinct().ToArray(); var proteinLengths = new Dictionary <long, int>(); using (var cmd = session.Connection.CreateCommand()) { string sql = "SELECT Id, LENGTH(Sequence) AS SequenceLength FROM ProteomeDbProtein P"; // Not L10N if (proteinIds.Length < 1000) { sql += " WHERE P.Id IN (" + // Not L10N string.Join(",", proteinIds) + ")"; // Not L10N } cmd.CommandText = sql; using (var reader = cmd.ExecuteReader()) { while (reader.Read()) { var id = reader.GetValue(0); var len = reader.GetValue(1); proteinLengths.Add(Convert.ToInt64(id), Convert.ToInt32(len)); } } } foreach (var p in proteinsToSearch) { int length; proteinLengths.TryGetValue(p.Protein.Id.GetValueOrDefault(), out length); unsearchedProteins.Add(new ProteinSearchInfo(p, length)); } } if (untaggedProteins.Any(untagged => !untagged.GetProteinMetadata().NeedsSearch())) // did any get set as unsearchable? { // Write back the ones that were formerly without search terms, but which now indicate no search is possible using (ISession session = OpenWriteSession()) { using (var transaction = session.BeginTransaction()) { foreach (var untagged in untaggedProteins.Where(untagged => !untagged.GetProteinMetadata().NeedsSearch())) { session.SaveOrUpdate(untagged); // update the metadata } transaction.Commit(); } } } if (unsearchedProteins.Any()) { int resultsCount = 0; int unsearchedCount = unsearchedProteins.Count; for (bool success = true; success;) { success = false; // Until we see at least one succeed this round var results = new List <DbProteinName>(); // The "true" arg means "do just one batch then return" foreach (var result in fastaImporter.DoWebserviceLookup(unsearchedProteins, null, true)) { if (result != null) { if ( !progressMonitor.Invoke( string.Format( Resources.ProteomeDb_LookupProteinMetadata_Retrieving_details_for__0__proteins, unsearchedProteins.Count), 100 * resultsCount++ / unsearchedCount)) { return(false); } success = true; results.Add(result.ProteinDbInfo); } } if (results.Any()) // save this batch { using (var session = OpenWriteSession()) { using (var transaction = session.BeginTransaction()) { foreach (var result in results) { session.SaveOrUpdate(result); } transaction.Commit(); session.Close(); } } } // Edit this list rather than rederive with database access var hits = unsearchedProteins.Where(p => !p.GetProteinMetadata().NeedsSearch()).ToList(); foreach (var hit in hits) { unsearchedProteins.Remove(hit); } } } return(true); }
public Digestion Digest(IProtease protease, String name, String description, ProgressMonitor progressMonitor) { DbOrganism organism; DbDigestion digestion; List <DbProtein> proteins; using (ISession session = ProteomeDb.OpenWriteSession()) { organism = GetEntity(session); session.BeginTransaction(); digestion = new DbDigestion { Name = name, Description = description, Organism = organism, MaxMissedCleavages = protease.MaxMissedCleavages }; session.Save(digestion); if (!progressMonitor.Invoke("Listing proteins", 0)) { return(null); } proteins = new List <DbProtein>(organism.Proteins); Dictionary <String, long> digestedPeptideIds = new Dictionary <string, long>(); const String sqlPeptide = "INSERT INTO ProteomeDbDigestedPeptide (Digestion, MissedCleavages, Sequence, Version) VALUES(@Digestion,@MissedCleavages,@Sequence,1);select last_insert_rowid();"; var commandPeptide = session.Connection.CreateCommand(); commandPeptide.CommandText = sqlPeptide; commandPeptide.Parameters.Add(new SQLiteParameter("@Digestion")); commandPeptide.Parameters.Add(new SQLiteParameter("@MissedCleavages")); commandPeptide.Parameters.Add(new SQLiteParameter("@Sequence")); const String sqlPeptideProtein = "INSERT INTO ProteomeDbDigestedPeptideProtein (StartIndex, Peptide, Protein, Version) VALUES(?,?,?,1);"; var commandProtein = session.Connection.CreateCommand(); commandProtein.CommandText = sqlPeptideProtein; commandProtein.Parameters.Add(new SQLiteParameter("@StartIndex")); commandProtein.Parameters.Add(new SQLiteParameter("@Peptide")); commandProtein.Parameters.Add(new SQLiteParameter("@Protein")); for (int i = 0; i < proteins.Count; i++) { if (!progressMonitor.Invoke("Digesting " + proteins.Count + " proteins", 100 * i / proteins.Count)) { return(null); } Protein protein = new Protein(this, proteins[i]); foreach (DigestedPeptide digestedPeptide in protease.Digest(protein)) { if (digestedPeptide.Sequence.Length > MAX_PEPTIDE_LENGTH) { continue; } long digestedPeptideId; if (!digestedPeptideIds.TryGetValue(digestedPeptide.Sequence, out digestedPeptideId)) { ((SQLiteParameter)commandPeptide.Parameters[0]).Value = digestion.Id; ((SQLiteParameter)commandPeptide.Parameters[1]).Value = digestedPeptide.MissedCleavages; ((SQLiteParameter)commandPeptide.Parameters[2]).Value = digestedPeptide.Sequence; digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar()); digestedPeptideIds.Add(digestedPeptide.Sequence, digestedPeptideId); } ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptide.Index; ((SQLiteParameter)commandProtein.Parameters[1]).Value = digestedPeptideId; ((SQLiteParameter)commandProtein.Parameters[2]).Value = proteins[i].Id; commandProtein.ExecuteNonQuery(); } } if (!progressMonitor.Invoke("Committing transaction", 99)) { return(null); } session.Transaction.Commit(); progressMonitor.Invoke( "Digested " + proteins.Count + " proteins into " + digestedPeptideIds.Count + " unique peptides", 100); return(new Digestion(this, digestion)); } }
public Digestion Digest(IProtease protease, ProgressMonitor progressMonitor) { using (ISession session = OpenWriteSession()) { DbDigestion dbDigestion = GetDbDigestion(protease.Name); HashSet <string> existingSequences = new HashSet <string>(); using (var transaction = session.BeginTransaction()) { if (dbDigestion != null) { if (dbDigestion.MaxSequenceLength >= MAX_SEQUENCE_LENGTH) { return(new Digestion(this, dbDigestion)); } if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_existing_peptides, 0)) { return(null); } IQuery query = session.CreateQuery("SELECT P.Sequence FROM " // Not L10N + typeof(DbDigestedPeptide) + " P WHERE P.Digestion = :Digestion") // Not L10N .SetParameter("Digestion", dbDigestion); // Not L10N List <String> listSequences = new List <string>(); query.List(listSequences); existingSequences.UnionWith(listSequences); dbDigestion.MaxSequenceLength = MAX_SEQUENCE_LENGTH; session.Update(dbDigestion); } else { dbDigestion = new DbDigestion { Name = protease.Name, MinSequenceLength = MIN_SEQUENCE_LENGTH, MaxSequenceLength = MAX_SEQUENCE_LENGTH, }; session.Save(dbDigestion); } if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_proteins, 0)) { return(null); } List <DbProtein> proteins = new List <DbProtein>(); session.CreateCriteria(typeof(DbProtein)).List(proteins); Dictionary <String, long> digestedPeptideIds = new Dictionary <string, long>(); const String sqlPeptide = "INSERT INTO ProteomeDbDigestedPeptide (Digestion, Sequence) VALUES(?,?);select last_insert_rowid();"; // Not L10N using (var commandPeptide = session.Connection.CreateCommand()) using (var commandProtein = session.Connection.CreateCommand()) { commandPeptide.CommandText = sqlPeptide; commandPeptide.Parameters.Add(new SQLiteParameter()); commandPeptide.Parameters.Add(new SQLiteParameter()); const String sqlPeptideProtein = "INSERT INTO ProteomeDbDigestedPeptideProtein (Peptide, Protein) VALUES(?,?);"; // Not L10N commandProtein.CommandText = sqlPeptideProtein; commandProtein.Parameters.Add(new SQLiteParameter()); commandProtein.Parameters.Add(new SQLiteParameter()); commandProtein.Parameters.Add(new SQLiteParameter()); for (int i = 0; i < proteins.Count; i++) { var proteinSequences = new HashSet <string>(); if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_Digest_Digesting__0__proteins, proteins.Count), 100 * i / proteins.Count)) { return(null); } Protein protein = new Protein(ProteomeDbPath, proteins[i]); foreach (DigestedPeptide digestedPeptide in protease.Digest(protein)) { if (digestedPeptide.Sequence.Length < dbDigestion.MinSequenceLength) { continue; } String truncatedSequence = digestedPeptide.Sequence.Substring( 0, Math.Min(digestedPeptide.Sequence.Length, dbDigestion.MaxSequenceLength)); if (existingSequences.Contains(truncatedSequence)) { continue; } if (proteinSequences.Contains(truncatedSequence)) { continue; } proteinSequences.Add(truncatedSequence); long digestedPeptideId; if (!digestedPeptideIds.TryGetValue(truncatedSequence, out digestedPeptideId)) { ((SQLiteParameter)commandPeptide.Parameters[0]).Value = dbDigestion.Id; ((SQLiteParameter)commandPeptide.Parameters[1]).Value = truncatedSequence; digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar()); digestedPeptideIds.Add(truncatedSequence, digestedPeptideId); } ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptideId; ((SQLiteParameter)commandProtein.Parameters[1]).Value = protein.Id; commandProtein.ExecuteNonQuery(); } } } if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99)) { return(null); } transaction.Commit(); AnalyzeDb(session); progressMonitor.Invoke( string.Format(Resources.ProteomeDb_Digest_Digested__0__proteins_into__1__unique_peptides, proteins.Count, digestedPeptideIds.Count), 100); } return(new Digestion(this, dbDigestion)); } }
public void AddFastaFile(StreamReader reader, ProgressMonitor progressMonitor) { Dictionary <string, ProtIdNames> proteinIds = new Dictionary <string, ProtIdNames>(); using (ISession session = OpenWriteSession()) { foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List()) { if (protein.Id.HasValue) { proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, protein.Names)); } } int proteinCount = 0; using (var transaction = session.BeginTransaction()) using (IDbCommand insertProtein = session.Connection.CreateCommand()) using (IDbCommand insertName = session.Connection.CreateCommand()) { WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now insertProtein.CommandText = "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();"; // Not L10N insertProtein.Parameters.Add(new SQLiteParameter()); insertName.CommandText = "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N insertName.Parameters.Add(new SQLiteParameter()); // Id insertName.Parameters.Add(new SQLiteParameter()); // IsPrimary insertName.Parameters.Add(new SQLiteParameter()); // Name insertName.Parameters.Add(new SQLiteParameter()); // Description insertName.Parameters.Add(new SQLiteParameter()); // PreferredName insertName.Parameters.Add(new SQLiteParameter()); // Accession insertName.Parameters.Add(new SQLiteParameter()); // Gene insertName.Parameters.Add(new SQLiteParameter()); // Species insertName.Parameters.Add(new SQLiteParameter()); // WebSearchInfo foreach (DbProtein protein in fastaImporter.Import(reader)) { int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1)); if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins, proteinCount), iProgress)) { return; } bool existingProtein = false; ProtIdNames proteinIdNames; if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames)) { existingProtein = true; } else { ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence; proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]); proteinIds.Add(protein.Sequence, proteinIdNames); proteinCount++; } foreach (var proteinName in protein.Names) { // Skip any names that already exist if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name))) { continue; } try { ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id; ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein; ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name; ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description; ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName; ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession; ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene; ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species; ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization insertName.ExecuteNonQuery(); } catch (Exception exception) { Console.Out.WriteLine(exception); } } } if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99)) { return; } transaction.Commit(); } AnalyzeDb(session); progressMonitor.Invoke( string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100); } }
/// <summary> /// Access the web to resolve protein metadata not directly found in fasta file. /// The fasta text importer will have left search hints in ProteinMetadata. /// </summary> /// <param name="progressMonitor"></param> /// <param name="fastaImporter">object that accesses the web, or pretends to if in a test</param> /// <param name="polite">if true, don't try to resolve everything in one go, assume we can come back later</param> /// <returns>true on success</returns> public bool LookupProteinMetadata(ProgressMonitor progressMonitor, WebEnabledFastaImporter fastaImporter, bool polite = false) { var unsearchedProteins = new List<ProteinSearchInfo>(); List<DbProteinName> untaggedProteins; using (ISession session = OpenSession()) { if (!progressMonitor.Invoke(Resources.ProteomeDb_LookupProteinMetadata_looking_for_unresolved_protein_details, 0)) { return false; } // get a list of proteins with unresolved metadata websearches var proteinNames = session.CreateCriteria(typeof (DbProteinName)).List<DbProteinName>(); var proteinsToSearch = proteinNames.Where(proteinName => (proteinName.GetProteinMetadata().GetPendingSearchTerm().Length > 0)) .ToList(); // and a list of proteins which have never been considered for metadata search untaggedProteins = proteinNames.Where(proteinName => proteinName.WebSearchInfo.IsEmpty()).ToList(); foreach (var untaggedProtein in untaggedProteins) { untaggedProtein.SetWebSearchCompleted(); // by default take this out of consideration for next time var metadata = untaggedProtein.GetProteinMetadata(); if (metadata.HasMissingMetadata()) { var search = fastaImporter.ParseProteinMetaData(metadata); if (search!=null) { metadata = untaggedProtein.ChangeProteinMetadata(metadata.Merge(search)); // don't stomp name by accident metadata = untaggedProtein.ChangeProteinMetadata(metadata.ChangeWebSearchInfo(search.WebSearchInfo)); } } if (metadata.NeedsSearch()) proteinsToSearch.Add(untaggedProtein); // add to the list of things to commit back to the db } // Get the lengths of the sequences without getting the sequences themselves, for best speed var proteinIds = proteinsToSearch.Select(name => name.Protein.Id.Value).Distinct().ToArray(); var proteinLengths = new Dictionary<long, int>(); using (var cmd = session.Connection.CreateCommand()) { string sql = "SELECT Id, LENGTH(Sequence) AS SequenceLength FROM ProteomeDbProtein P"; // Not L10N if (proteinIds.Length < 1000) { sql += " WHERE P.Id IN (" + // Not L10N string.Join(",", proteinIds) + ")"; // Not L10N } cmd.CommandText = sql; using (var reader = cmd.ExecuteReader()) { while (reader.Read()) { var id = reader.GetValue(0); var len = reader.GetValue(1); proteinLengths.Add(Convert.ToInt64(id), Convert.ToInt32(len)); } } } foreach (var p in proteinsToSearch) { int length; proteinLengths.TryGetValue(p.Protein.Id.GetValueOrDefault(), out length); unsearchedProteins.Add(new ProteinSearchInfo(p, length)); } } if (untaggedProteins.Any(untagged => !untagged.GetProteinMetadata().NeedsSearch())) // did any get set as unsearchable? { // Write back the ones that were formerly without search terms, but which now indicate no search is possible using (ISession session = OpenWriteSession()) { using (var transaction = session.BeginTransaction()) { foreach (var untagged in untaggedProteins.Where(untagged => !untagged.GetProteinMetadata().NeedsSearch())) session.SaveOrUpdate(untagged); // update the metadata transaction.Commit(); } } } if (unsearchedProteins.Any()) { int resultsCount = 0; int unsearchedCount = unsearchedProteins.Count; for (bool success = true; success;) { success = false; // Until we see at least one succeed this round var results = new List<DbProteinName>(); // The "true" arg means "do just one batch then return" foreach (var result in fastaImporter.DoWebserviceLookup(unsearchedProteins, null, true)) { if (result != null) { if ( !progressMonitor.Invoke( string.Format( Resources.ProteomeDb_LookupProteinMetadata_Retrieving_details_for__0__proteins, unsearchedProteins.Count), 100 * resultsCount++ / unsearchedCount)) { return false; } success = true; results.Add(result.ProteinDbInfo); } } if (results.Any()) // save this batch { using (var session = OpenWriteSession()) { using (var transaction = session.BeginTransaction()) { foreach (var result in results) session.SaveOrUpdate(result); transaction.Commit(); session.Close(); } } } // Edit this list rather than rederive with database access var hits = unsearchedProteins.Where(p => !p.GetProteinMetadata().NeedsSearch()).ToList(); foreach (var hit in hits) { unsearchedProteins.Remove(hit); } } } return true; }
public Digestion Digest(IProtease protease, ProgressMonitor progressMonitor) { using (ISession session = OpenWriteSession()) { DbDigestion dbDigestion = GetDbDigestion(protease.Name); HashSet<string> existingSequences = new HashSet<string>(); using (var transaction = session.BeginTransaction()) { if (dbDigestion != null) { if (dbDigestion.MaxSequenceLength >= MAX_SEQUENCE_LENGTH) { return new Digestion(this, dbDigestion); } if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_existing_peptides, 0)) { return null; } IQuery query = session.CreateQuery("SELECT P.Sequence FROM " // Not L10N + typeof(DbDigestedPeptide) + " P WHERE P.Digestion = :Digestion") // Not L10N .SetParameter("Digestion", dbDigestion); // Not L10N List<String> listSequences = new List<string>(); query.List(listSequences); existingSequences.UnionWith(listSequences); dbDigestion.MaxSequenceLength = MAX_SEQUENCE_LENGTH; session.Update(dbDigestion); } else { dbDigestion = new DbDigestion { Name = protease.Name, MinSequenceLength = MIN_SEQUENCE_LENGTH, MaxSequenceLength = MAX_SEQUENCE_LENGTH, }; session.Save(dbDigestion); } if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_proteins, 0)) { return null; } List<DbProtein> proteins = new List<DbProtein>(); session.CreateCriteria(typeof(DbProtein)).List(proteins); Dictionary<String, long> digestedPeptideIds = new Dictionary<string, long>(); const String sqlPeptide = "INSERT INTO ProteomeDbDigestedPeptide (Digestion, Sequence) VALUES(?,?);select last_insert_rowid();"; // Not L10N using (var commandPeptide = session.Connection.CreateCommand()) using (var commandProtein = session.Connection.CreateCommand()) { commandPeptide.CommandText = sqlPeptide; commandPeptide.Parameters.Add(new SQLiteParameter()); commandPeptide.Parameters.Add(new SQLiteParameter()); const String sqlPeptideProtein = "INSERT INTO ProteomeDbDigestedPeptideProtein (Peptide, Protein) VALUES(?,?);"; // Not L10N commandProtein.CommandText = sqlPeptideProtein; commandProtein.Parameters.Add(new SQLiteParameter()); commandProtein.Parameters.Add(new SQLiteParameter()); commandProtein.Parameters.Add(new SQLiteParameter()); for (int i = 0; i < proteins.Count; i++) { var proteinSequences = new HashSet<string>(); if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_Digest_Digesting__0__proteins,proteins.Count), 100 * i / proteins.Count)) { return null; } Protein protein = new Protein(ProteomeDbPath, proteins[i]); foreach (DigestedPeptide digestedPeptide in protease.Digest(protein)) { if (digestedPeptide.Sequence.Length < dbDigestion.MinSequenceLength) { continue; } String truncatedSequence = digestedPeptide.Sequence.Substring( 0, Math.Min(digestedPeptide.Sequence.Length, dbDigestion.MaxSequenceLength)); if (existingSequences.Contains(truncatedSequence)) { continue; } if (proteinSequences.Contains(truncatedSequence)) { continue; } proteinSequences.Add(truncatedSequence); long digestedPeptideId; if (!digestedPeptideIds.TryGetValue(truncatedSequence, out digestedPeptideId)) { ((SQLiteParameter)commandPeptide.Parameters[0]).Value = dbDigestion.Id; ((SQLiteParameter)commandPeptide.Parameters[1]).Value = truncatedSequence; digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar()); digestedPeptideIds.Add(truncatedSequence, digestedPeptideId); } ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptideId; ((SQLiteParameter)commandProtein.Parameters[1]).Value = protein.Id; commandProtein.ExecuteNonQuery(); } } } if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99)) { return null; } transaction.Commit(); AnalyzeDb(session); progressMonitor.Invoke( string.Format(Resources.ProteomeDb_Digest_Digested__0__proteins_into__1__unique_peptides, proteins.Count, digestedPeptideIds.Count), 100); } return new Digestion(this, dbDigestion); } }
public void AddFastaFile(StreamReader reader, ProgressMonitor progressMonitor) { Dictionary<string, ProtIdNames> proteinIds = new Dictionary<string, ProtIdNames>(); using (ISession session = OpenWriteSession()) { foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List()) { if (protein.Id.HasValue) proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, protein.Names)); } int proteinCount = 0; using (var transaction = session.BeginTransaction()) using (IDbCommand insertProtein = session.Connection.CreateCommand()) using (IDbCommand insertName = session.Connection.CreateCommand()) { WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now insertProtein.CommandText = "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();"; // Not L10N insertProtein.Parameters.Add(new SQLiteParameter()); insertName.CommandText = "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N insertName.Parameters.Add(new SQLiteParameter()); // Id insertName.Parameters.Add(new SQLiteParameter()); // IsPrimary insertName.Parameters.Add(new SQLiteParameter()); // Name insertName.Parameters.Add(new SQLiteParameter()); // Description insertName.Parameters.Add(new SQLiteParameter()); // PreferredName insertName.Parameters.Add(new SQLiteParameter()); // Accession insertName.Parameters.Add(new SQLiteParameter()); // Gene insertName.Parameters.Add(new SQLiteParameter()); // Species insertName.Parameters.Add(new SQLiteParameter()); // WebSearchInfo foreach (DbProtein protein in fastaImporter.Import(reader)) { int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1)); if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins,proteinCount), iProgress)) { return; } bool existingProtein = false; ProtIdNames proteinIdNames; if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames)) { existingProtein = true; } else { ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence; proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]); proteinIds.Add(protein.Sequence, proteinIdNames); proteinCount++; } foreach (var proteinName in protein.Names) { // Skip any names that already exist if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name))) continue; try { ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id; ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein; ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name; ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description; ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName; ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession; ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene; ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species; ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization insertName.ExecuteNonQuery(); } catch (Exception exception) { Console.Out.WriteLine(exception); } } } if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99)) { return; } transaction.Commit(); } AnalyzeDb(session); progressMonitor.Invoke( string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100); } }