Exemplo n.º 1
0
        public void AddFastaFile(StreamReader reader, ProgressMonitor progressMonitor)
        {
            Dictionary <string, ProtIdNames> proteinIds = new Dictionary <string, ProtIdNames>();

            using (ISession session = OpenWriteSession())
            {
                foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List())
                {
                    if (protein.Id.HasValue)
                    {
                        proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, protein.Names));
                    }
                }
                int proteinCount = 0;
                using (var transaction = session.BeginTransaction())
                    using (IDbCommand insertProtein = session.Connection.CreateCommand())
                        using (IDbCommand insertName = session.Connection.CreateCommand())
                        {
                            WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now
                            insertProtein.CommandText =
                                "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();";                            // Not L10N
                            insertProtein.Parameters.Add(new SQLiteParameter());
                            insertName.CommandText =
                                "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Id
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // IsPrimary
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Name
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Description
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // PreferredName
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Accession
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Gene
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Species
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // WebSearchInfo


                            foreach (DbProtein protein in fastaImporter.Import(reader))
                            {
                                int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1));
                                if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins, proteinCount), iProgress))
                                {
                                    return;
                                }
                                bool        existingProtein = false;
                                ProtIdNames proteinIdNames;
                                if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames))
                                {
                                    existingProtein = true;
                                }
                                else
                                {
                                    ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence;
                                    proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]);
                                    proteinIds.Add(protein.Sequence, proteinIdNames);
                                    proteinCount++;
                                }
                                foreach (var proteinName in protein.Names)
                                {
                                    // Skip any names that already exist
                                    if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name)))
                                    {
                                        continue;
                                    }

                                    try
                                    {
                                        ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id;
                                        ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein;
                                        ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name;
                                        ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description;
                                        ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName;
                                        ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession;
                                        ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene;
                                        ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species;
                                        ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization
                                        insertName.ExecuteNonQuery();
                                    }
                                    catch (Exception exception)
                                    {
                                        Console.Out.WriteLine(exception);
                                    }
                                }
                            }
                            if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                            {
                                return;
                            }
                            transaction.Commit();
                        }
                AnalyzeDb(session);
                progressMonitor.Invoke(
                    string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100);
            }
        }
Exemplo n.º 2
0
        public void AddFastaFile(StreamReader reader, IProgressMonitor progressMonitor, ref IProgressStatus status,
                                 bool delayAnalyzeDb, out int duplicateSequenceCount)
        {
            Dictionary <string, ProtIdNames> proteinIds = new Dictionary <string, ProtIdNames>();

            using (IStatelessSession session = SessionFactory.OpenStatelessSession()) // This is a long session, but there's no harm since db is useless till its done
            {
                var proteinNames = session.CreateCriteria <DbProteinName>().List <DbProteinName>().ToLookup(name => name.Id.Value);
                foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List())
                {
                    if (protein.Id.HasValue)
                    {
                        proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, proteinNames[protein.Id.Value].ToArray()));
                    }
                }
                int proteinCount = 0;
                duplicateSequenceCount = 0;
                using (var transaction = session.BeginTransaction())
                    using (IDbCommand insertProtein = session.Connection.CreateCommand())
                        using (IDbCommand insertName = session.Connection.CreateCommand())
                        {
                            WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now
                            insertProtein.CommandText =
                                "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();";                            // Not L10N
                            insertProtein.Parameters.Add(new SQLiteParameter());
                            insertName.CommandText =
                                "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Id
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // IsPrimary
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Name
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Description
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // PreferredName
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Accession
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Gene
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Species
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // WebSearchInfo


                            foreach (DbProtein protein in fastaImporter.Import(reader))
                            {
                                int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1));
                                if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins, proteinCount), iProgress))
                                {
                                    return;
                                }
                                bool        existingProtein = false;
                                ProtIdNames proteinIdNames;
                                if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames))
                                {
                                    existingProtein = true;
                                    duplicateSequenceCount++;
                                }
                                else
                                {
                                    ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence;
                                    proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]);
                                    proteinIds.Add(protein.Sequence, proteinIdNames);
                                    proteinCount++;
                                }
                                foreach (var proteinName in protein.Names)
                                {
                                    // Skip any names that already exist
                                    if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name)))
                                    {
                                        continue;
                                    }

                                    try
                                    {
                                        ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id;
                                        ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein;
                                        ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name;
                                        ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description;
                                        ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName;
                                        ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession;
                                        ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene;
                                        ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species;
                                        ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization
                                        insertName.ExecuteNonQuery();
                                    }
                                    catch (Exception exception)
                                    {
                                        Console.Out.WriteLine(exception);
                                    }
                                }
                            }
                            if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                            {
                                return;
                            }

                            if (HasSubsequencesTable(() => session.Connection))
                            {
                                DigestProteins(session.Connection, proteinIds, progressMonitor, ref status);
                            }
                            if (progressMonitor.IsCanceled)
                            {
                                return;
                            }
                            transaction.Commit();
                        }
                if (!delayAnalyzeDb)
                {
                    AnalyzeDb(session.Connection); // NB This runs asynchronously and may interfere with further writes
                }
                UpdateProgressAndCheckForCancellation(progressMonitor, ref status,
                                                      string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100);
            }
        }