Exemple #1
0
        public void AddFastaFile(StreamReader reader, ProgressMonitor progressMonitor)
        {
            Dictionary<string, ProtIdNames> proteinIds = new Dictionary<string, ProtIdNames>();
            using (ISession session = OpenWriteSession())
            {
                foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List())
                {
                    if (protein.Id.HasValue)
                        proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, protein.Names));
                }
                int proteinCount = 0;
                using (var transaction = session.BeginTransaction())
                using (IDbCommand insertProtein = session.Connection.CreateCommand())
                using (IDbCommand insertName = session.Connection.CreateCommand())
                {
                    WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now
                    insertProtein.CommandText =
                        "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();"; // Not L10N
                    insertProtein.Parameters.Add(new SQLiteParameter());
                    insertName.CommandText =
                        "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N
                    insertName.Parameters.Add(new SQLiteParameter()); // Id
                    insertName.Parameters.Add(new SQLiteParameter()); // IsPrimary
                    insertName.Parameters.Add(new SQLiteParameter()); // Name
                    insertName.Parameters.Add(new SQLiteParameter()); // Description
                    insertName.Parameters.Add(new SQLiteParameter()); // PreferredName
                    insertName.Parameters.Add(new SQLiteParameter()); // Accession
                    insertName.Parameters.Add(new SQLiteParameter()); // Gene
                    insertName.Parameters.Add(new SQLiteParameter()); // Species
                    insertName.Parameters.Add(new SQLiteParameter()); // WebSearchInfo


                    foreach (DbProtein protein in fastaImporter.Import(reader))
                    {
                        int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1));
                        if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins,proteinCount), iProgress))
                        {
                            return;
                        }
                        bool existingProtein = false;
                        ProtIdNames proteinIdNames;
                        if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames))
                        {
                            existingProtein = true;
                        }
                        else
                        {
                            ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence;
                            proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]);
                            proteinIds.Add(protein.Sequence, proteinIdNames);
                            proteinCount++;
                        }
                        foreach (var proteinName in protein.Names)
                        {
                            // Skip any names that already exist
                            if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name)))
                                continue;

                            try
                            {
                                ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id;
                                ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein;
                                ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name;
                                ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description;
                                ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName;
                                ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession;
                                ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene;
                                ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species;
                                ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization
                                insertName.ExecuteNonQuery();
                            }
                            catch (Exception exception)
                            {
                                Console.Out.WriteLine(exception);
                            }
                        }
                    }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                    {
                        return;
                    }
                    transaction.Commit();
                }
                AnalyzeDb(session);
                progressMonitor.Invoke(
                    string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100);
            }
        }
        private void DoWork()
        {
            try
            {
                // just do the basic name+description parsing, no regex or web access - we don't use extended metadata here
                var fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.FakeWebSearchProvider());
                var proteins = new Dictionary<string, ProteinData>();
                foreach (var path in FastaFilePaths)
                {
                    var statusText = "Reading FASTA file " + Path.GetFileName(path);
                    var fileInfo = new FileInfo(path);
                    var reader = File.OpenText(path);
                    foreach (var protein in fastaImporter.Import(File.OpenText(path)))
                    {
                        if (!UpdateProgress(statusText, (int) (reader.BaseStream.Position * 100 / fileInfo.Length)))
                        {
                            return;
                        }
                        ProteinData proteinData;
                        if (!proteins.TryGetValue(protein.Sequence, out proteinData))
                        {
                            proteinData = new ProteinData(protein.Sequence);
                            proteins.Add(protein.Sequence, proteinData);
                        }
                        foreach (var name in protein.Names)
                        {
                            proteinData.AddName(name.Name, name.Description);
                        }
                    }
                }
                UpdatePeptides(proteins);

                if (!IsDisposed)
                {
                    BeginInvoke(new Action(Close));
                }
            }
            finally
            {
                lock(this)
                {
                    _running = false;
                    Monitor.PulseAll(this);
                }
            }
        }