Esempio n. 1
0
        /// <summary>
        /// Access the web to resolve protein metadata not directly found in fasta file.
        /// The fasta text importer will have left search hints in ProteinMetadata.
        /// </summary>
        /// <param name="progressMonitor"></param>
        /// <param name="fastaImporter">object that accesses the web, or pretends to if in a test</param>
        /// <param name="polite">if true, don't try to resolve everything in one go, assume we can come back later</param>
        /// <returns>true on success</returns>
        public bool LookupProteinMetadata(ProgressMonitor progressMonitor, WebEnabledFastaImporter fastaImporter, bool polite = false)
        {
            var unsearchedProteins = new List <ProteinSearchInfo>();
            List <DbProteinName> untaggedProteins;

            using (ISession session = OpenSession())
            {
                if (!progressMonitor.Invoke(Resources.ProteomeDb_LookupProteinMetadata_looking_for_unresolved_protein_details, 0))
                {
                    return(false);
                }

                // get a list of proteins with unresolved metadata websearches
                var proteinNames     = session.CreateCriteria(typeof(DbProteinName)).List <DbProteinName>();
                var proteinsToSearch =
                    proteinNames.Where(proteinName => (proteinName.GetProteinMetadata().GetPendingSearchTerm().Length > 0))
                    .ToList();
                // and a list of proteins which have never been considered for metadata search
                untaggedProteins =
                    proteinNames.Where(proteinName => proteinName.WebSearchInfo.IsEmpty()).ToList();

                foreach (var untaggedProtein in untaggedProteins)
                {
                    untaggedProtein.SetWebSearchCompleted(); // by default take this out of consideration for next time
                    var metadata = untaggedProtein.GetProteinMetadata();
                    if (metadata.HasMissingMetadata())
                    {
                        var search = fastaImporter.ParseProteinMetaData(metadata);
                        if (search != null)
                        {
                            metadata = untaggedProtein.ChangeProteinMetadata(metadata.Merge(search)); // don't stomp name by accident
                            metadata = untaggedProtein.ChangeProteinMetadata(metadata.ChangeWebSearchInfo(search.WebSearchInfo));
                        }
                    }
                    if (metadata.NeedsSearch())
                    {
                        proteinsToSearch.Add(untaggedProtein); // add to the list of things to commit back to the db
                    }
                }
                // Get the lengths of the sequences without getting the sequences themselves, for best speed
                var proteinIds     = proteinsToSearch.Select(name => name.Protein.Id.Value).Distinct().ToArray();
                var proteinLengths = new Dictionary <long, int>();
                using (var cmd = session.Connection.CreateCommand())
                {
                    string sql = "SELECT Id, LENGTH(Sequence) AS SequenceLength FROM ProteomeDbProtein P"; // Not L10N
                    if (proteinIds.Length < 1000)
                    {
                        sql += " WHERE P.Id IN (" +                // Not L10N
                               string.Join(",", proteinIds) + ")"; // Not L10N
                    }
                    cmd.CommandText = sql;
                    using (var reader = cmd.ExecuteReader())
                    {
                        while (reader.Read())
                        {
                            var id  = reader.GetValue(0);
                            var len = reader.GetValue(1);
                            proteinLengths.Add(Convert.ToInt64(id), Convert.ToInt32(len));
                        }
                    }
                }
                foreach (var p in proteinsToSearch)
                {
                    int length;
                    proteinLengths.TryGetValue(p.Protein.Id.GetValueOrDefault(), out length);
                    unsearchedProteins.Add(new ProteinSearchInfo(p, length));
                }
            }

            if (untaggedProteins.Any(untagged => !untagged.GetProteinMetadata().NeedsSearch())) // did any get set as unsearchable?
            {
                // Write back the ones that were formerly without search terms, but which now indicate no search is possible
                using (ISession session = OpenWriteSession())
                {
                    using (var transaction = session.BeginTransaction())
                    {
                        foreach (var untagged in untaggedProteins.Where(untagged => !untagged.GetProteinMetadata().NeedsSearch()))
                        {
                            session.SaveOrUpdate(untagged); // update the metadata
                        }
                        transaction.Commit();
                    }
                }
            }

            if (unsearchedProteins.Any())
            {
                int resultsCount    = 0;
                int unsearchedCount = unsearchedProteins.Count;
                for (bool success = true; success;)
                {
                    success = false; // Until we see at least one succeed this round
                    var results = new List <DbProteinName>();

                    // The "true" arg means "do just one batch then return"
                    foreach (var result in fastaImporter.DoWebserviceLookup(unsearchedProteins, null, true))
                    {
                        if (result != null)
                        {
                            if (
                                !progressMonitor.Invoke(
                                    string.Format(
                                        Resources.ProteomeDb_LookupProteinMetadata_Retrieving_details_for__0__proteins,
                                        unsearchedProteins.Count), 100 * resultsCount++ / unsearchedCount))
                            {
                                return(false);
                            }
                            success = true;
                            results.Add(result.ProteinDbInfo);
                        }
                    }
                    if (results.Any()) // save this batch
                    {
                        using (var session = OpenWriteSession())
                        {
                            using (var transaction = session.BeginTransaction())
                            {
                                foreach (var result in results)
                                {
                                    session.SaveOrUpdate(result);
                                }
                                transaction.Commit();
                                session.Close();
                            }
                        }
                    }
                    // Edit this list rather than rederive with database access
                    var hits = unsearchedProteins.Where(p => !p.GetProteinMetadata().NeedsSearch()).ToList();
                    foreach (var hit in hits)
                    {
                        unsearchedProteins.Remove(hit);
                    }
                }
            }
            return(true);
        }
Esempio n. 2
0
        public Digestion Digest(IProtease protease, String name, String description, ProgressMonitor progressMonitor)
        {
            DbOrganism       organism;
            DbDigestion      digestion;
            List <DbProtein> proteins;

            using (ISession session = ProteomeDb.OpenWriteSession())
            {
                organism = GetEntity(session);
                session.BeginTransaction();
                digestion = new DbDigestion
                {
                    Name               = name,
                    Description        = description,
                    Organism           = organism,
                    MaxMissedCleavages = protease.MaxMissedCleavages
                };
                session.Save(digestion);
                if (!progressMonitor.Invoke("Listing proteins", 0))
                {
                    return(null);
                }
                proteins = new List <DbProtein>(organism.Proteins);
                Dictionary <String, long> digestedPeptideIds
                    = new Dictionary <string, long>();
                const String sqlPeptide =
                    "INSERT INTO ProteomeDbDigestedPeptide (Digestion, MissedCleavages, Sequence, Version) VALUES(@Digestion,@MissedCleavages,@Sequence,1);select last_insert_rowid();";
                var commandPeptide = session.Connection.CreateCommand();
                commandPeptide.CommandText = sqlPeptide;
                commandPeptide.Parameters.Add(new SQLiteParameter("@Digestion"));
                commandPeptide.Parameters.Add(new SQLiteParameter("@MissedCleavages"));
                commandPeptide.Parameters.Add(new SQLiteParameter("@Sequence"));
                const String sqlPeptideProtein =
                    "INSERT INTO ProteomeDbDigestedPeptideProtein (StartIndex, Peptide, Protein, Version) VALUES(?,?,?,1);";
                var commandProtein = session.Connection.CreateCommand();
                commandProtein.CommandText = sqlPeptideProtein;
                commandProtein.Parameters.Add(new SQLiteParameter("@StartIndex"));
                commandProtein.Parameters.Add(new SQLiteParameter("@Peptide"));
                commandProtein.Parameters.Add(new SQLiteParameter("@Protein"));
                for (int i = 0; i < proteins.Count; i++)
                {
                    if (!progressMonitor.Invoke("Digesting " + proteins.Count
                                                + " proteins", 100 * i / proteins.Count))
                    {
                        return(null);
                    }
                    Protein protein = new Protein(this, proteins[i]);
                    foreach (DigestedPeptide digestedPeptide in protease.Digest(protein))
                    {
                        if (digestedPeptide.Sequence.Length > MAX_PEPTIDE_LENGTH)
                        {
                            continue;
                        }
                        long digestedPeptideId;
                        if (!digestedPeptideIds.TryGetValue(digestedPeptide.Sequence, out digestedPeptideId))
                        {
                            ((SQLiteParameter)commandPeptide.Parameters[0]).Value = digestion.Id;
                            ((SQLiteParameter)commandPeptide.Parameters[1]).Value = digestedPeptide.MissedCleavages;
                            ((SQLiteParameter)commandPeptide.Parameters[2]).Value = digestedPeptide.Sequence;
                            digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar());
                            digestedPeptideIds.Add(digestedPeptide.Sequence, digestedPeptideId);
                        }
                        ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptide.Index;
                        ((SQLiteParameter)commandProtein.Parameters[1]).Value = digestedPeptideId;
                        ((SQLiteParameter)commandProtein.Parameters[2]).Value = proteins[i].Id;
                        commandProtein.ExecuteNonQuery();
                    }
                }
                if (!progressMonitor.Invoke("Committing transaction", 99))
                {
                    return(null);
                }
                session.Transaction.Commit();
                progressMonitor.Invoke(
                    "Digested " + proteins.Count + " proteins into " + digestedPeptideIds.Count + " unique peptides",
                    100);
                return(new Digestion(this, digestion));
            }
        }
Esempio n. 3
0
        public Digestion Digest(IProtease protease, ProgressMonitor progressMonitor)
        {
            using (ISession session = OpenWriteSession())
            {
                DbDigestion      dbDigestion       = GetDbDigestion(protease.Name);
                HashSet <string> existingSequences = new HashSet <string>();
                using (var transaction = session.BeginTransaction())
                {
                    if (dbDigestion != null)
                    {
                        if (dbDigestion.MaxSequenceLength >= MAX_SEQUENCE_LENGTH)
                        {
                            return(new Digestion(this, dbDigestion));
                        }
                        if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_existing_peptides, 0))
                        {
                            return(null);
                        }
                        IQuery query = session.CreateQuery("SELECT P.Sequence FROM "                                          // Not L10N
                                                           + typeof(DbDigestedPeptide) + " P WHERE P.Digestion = :Digestion") // Not L10N
                                       .SetParameter("Digestion", dbDigestion);                                               // Not L10N
                        List <String> listSequences = new List <string>();
                        query.List(listSequences);
                        existingSequences.UnionWith(listSequences);
                        dbDigestion.MaxSequenceLength = MAX_SEQUENCE_LENGTH;
                        session.Update(dbDigestion);
                    }
                    else
                    {
                        dbDigestion = new DbDigestion
                        {
                            Name = protease.Name,
                            MinSequenceLength = MIN_SEQUENCE_LENGTH,
                            MaxSequenceLength = MAX_SEQUENCE_LENGTH,
                        };
                        session.Save(dbDigestion);
                    }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_proteins, 0))
                    {
                        return(null);
                    }
                    List <DbProtein> proteins = new List <DbProtein>();
                    session.CreateCriteria(typeof(DbProtein)).List(proteins);
                    Dictionary <String, long> digestedPeptideIds
                        = new Dictionary <string, long>();
                    const String sqlPeptide =
                        "INSERT INTO ProteomeDbDigestedPeptide (Digestion, Sequence) VALUES(?,?);select last_insert_rowid();";     // Not L10N
                    using (var commandPeptide = session.Connection.CreateCommand())
                        using (var commandProtein = session.Connection.CreateCommand())
                        {
                            commandPeptide.CommandText = sqlPeptide;
                            commandPeptide.Parameters.Add(new SQLiteParameter());
                            commandPeptide.Parameters.Add(new SQLiteParameter());
                            const String sqlPeptideProtein =
                                "INSERT INTO ProteomeDbDigestedPeptideProtein (Peptide, Protein) VALUES(?,?);"; // Not L10N
                            commandProtein.CommandText = sqlPeptideProtein;
                            commandProtein.Parameters.Add(new SQLiteParameter());
                            commandProtein.Parameters.Add(new SQLiteParameter());
                            commandProtein.Parameters.Add(new SQLiteParameter());
                            for (int i = 0; i < proteins.Count; i++)
                            {
                                var proteinSequences = new HashSet <string>();
                                if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_Digest_Digesting__0__proteins, proteins.Count), 100 * i / proteins.Count))
                                {
                                    return(null);
                                }
                                Protein protein = new Protein(ProteomeDbPath, proteins[i]);

                                foreach (DigestedPeptide digestedPeptide in protease.Digest(protein))
                                {
                                    if (digestedPeptide.Sequence.Length < dbDigestion.MinSequenceLength)
                                    {
                                        continue;
                                    }
                                    String truncatedSequence = digestedPeptide.Sequence.Substring(
                                        0, Math.Min(digestedPeptide.Sequence.Length, dbDigestion.MaxSequenceLength));
                                    if (existingSequences.Contains(truncatedSequence))
                                    {
                                        continue;
                                    }
                                    if (proteinSequences.Contains(truncatedSequence))
                                    {
                                        continue;
                                    }
                                    proteinSequences.Add(truncatedSequence);
                                    long digestedPeptideId;
                                    if (!digestedPeptideIds.TryGetValue(truncatedSequence, out digestedPeptideId))
                                    {
                                        ((SQLiteParameter)commandPeptide.Parameters[0]).Value = dbDigestion.Id;
                                        ((SQLiteParameter)commandPeptide.Parameters[1]).Value = truncatedSequence;
                                        digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar());
                                        digestedPeptideIds.Add(truncatedSequence, digestedPeptideId);
                                    }
                                    ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptideId;
                                    ((SQLiteParameter)commandProtein.Parameters[1]).Value = protein.Id;
                                    commandProtein.ExecuteNonQuery();
                                }
                            }
                        }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                    {
                        return(null);
                    }
                    transaction.Commit();

                    AnalyzeDb(session);
                    progressMonitor.Invoke(
                        string.Format(Resources.ProteomeDb_Digest_Digested__0__proteins_into__1__unique_peptides,
                                      proteins.Count, digestedPeptideIds.Count),
                        100);
                }
                return(new Digestion(this, dbDigestion));
            }
        }
Esempio n. 4
0
        public void AddFastaFile(StreamReader reader, ProgressMonitor progressMonitor)
        {
            Dictionary <string, ProtIdNames> proteinIds = new Dictionary <string, ProtIdNames>();

            using (ISession session = OpenWriteSession())
            {
                foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List())
                {
                    if (protein.Id.HasValue)
                    {
                        proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, protein.Names));
                    }
                }
                int proteinCount = 0;
                using (var transaction = session.BeginTransaction())
                    using (IDbCommand insertProtein = session.Connection.CreateCommand())
                        using (IDbCommand insertName = session.Connection.CreateCommand())
                        {
                            WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now
                            insertProtein.CommandText =
                                "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();";                            // Not L10N
                            insertProtein.Parameters.Add(new SQLiteParameter());
                            insertName.CommandText =
                                "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Id
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // IsPrimary
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Name
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Description
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // PreferredName
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Accession
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Gene
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Species
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // WebSearchInfo


                            foreach (DbProtein protein in fastaImporter.Import(reader))
                            {
                                int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1));
                                if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins, proteinCount), iProgress))
                                {
                                    return;
                                }
                                bool        existingProtein = false;
                                ProtIdNames proteinIdNames;
                                if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames))
                                {
                                    existingProtein = true;
                                }
                                else
                                {
                                    ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence;
                                    proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]);
                                    proteinIds.Add(protein.Sequence, proteinIdNames);
                                    proteinCount++;
                                }
                                foreach (var proteinName in protein.Names)
                                {
                                    // Skip any names that already exist
                                    if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name)))
                                    {
                                        continue;
                                    }

                                    try
                                    {
                                        ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id;
                                        ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein;
                                        ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name;
                                        ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description;
                                        ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName;
                                        ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession;
                                        ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene;
                                        ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species;
                                        ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization
                                        insertName.ExecuteNonQuery();
                                    }
                                    catch (Exception exception)
                                    {
                                        Console.Out.WriteLine(exception);
                                    }
                                }
                            }
                            if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                            {
                                return;
                            }
                            transaction.Commit();
                        }
                AnalyzeDb(session);
                progressMonitor.Invoke(
                    string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100);
            }
        }
Esempio n. 5
0
        /// <summary>
        /// Access the web to resolve protein metadata not directly found in fasta file.
        /// The fasta text importer will have left search hints in ProteinMetadata.
        /// </summary>
        /// <param name="progressMonitor"></param>
        /// <param name="fastaImporter">object that accesses the web, or pretends to if in a test</param>
        /// <param name="polite">if true, don't try to resolve everything in one go, assume we can come back later</param>
        /// <returns>true on success</returns>
        public bool LookupProteinMetadata(ProgressMonitor progressMonitor, WebEnabledFastaImporter fastaImporter, bool polite = false)
        {
            var unsearchedProteins = new List<ProteinSearchInfo>();
            List<DbProteinName> untaggedProteins;
            using (ISession session = OpenSession())
            {
                if (!progressMonitor.Invoke(Resources.ProteomeDb_LookupProteinMetadata_looking_for_unresolved_protein_details, 0))
                {
                    return false;
                }

                // get a list of proteins with unresolved metadata websearches
                var proteinNames = session.CreateCriteria(typeof (DbProteinName)).List<DbProteinName>();
                var proteinsToSearch =
                    proteinNames.Where(proteinName => (proteinName.GetProteinMetadata().GetPendingSearchTerm().Length > 0))
                        .ToList();
                // and a list of proteins which have never been considered for metadata search
                untaggedProteins =
                    proteinNames.Where(proteinName => proteinName.WebSearchInfo.IsEmpty()).ToList();

                foreach (var untaggedProtein in untaggedProteins)
                {
                    untaggedProtein.SetWebSearchCompleted(); // by default take this out of consideration for next time
                    var metadata = untaggedProtein.GetProteinMetadata();
                    if (metadata.HasMissingMetadata())
                    {
                        var search = fastaImporter.ParseProteinMetaData(metadata);
                        if (search!=null)
                        {
                            metadata = untaggedProtein.ChangeProteinMetadata(metadata.Merge(search)); // don't stomp name by accident
                            metadata = untaggedProtein.ChangeProteinMetadata(metadata.ChangeWebSearchInfo(search.WebSearchInfo));
                        }
                    }
                    if (metadata.NeedsSearch())
                        proteinsToSearch.Add(untaggedProtein); // add to the list of things to commit back to the db
                }
                // Get the lengths of the sequences without getting the sequences themselves, for best speed
                var proteinIds = proteinsToSearch.Select(name => name.Protein.Id.Value).Distinct().ToArray();
                var proteinLengths = new Dictionary<long, int>();
                using (var cmd = session.Connection.CreateCommand())
                {
                    string sql = "SELECT Id, LENGTH(Sequence) AS SequenceLength FROM ProteomeDbProtein P"; // Not L10N
                    if (proteinIds.Length < 1000)
                    {
                        sql += " WHERE P.Id IN (" + // Not L10N
                        string.Join(",", proteinIds) + ")"; // Not L10N
                    }
                    cmd.CommandText = sql;
                    using (var reader = cmd.ExecuteReader())
                    {
                        while (reader.Read())
                        {
                            var id = reader.GetValue(0);
                            var len = reader.GetValue(1);
                            proteinLengths.Add(Convert.ToInt64(id), Convert.ToInt32(len));
                        }
                    }
                }
                foreach (var p in proteinsToSearch)
                {
                    int length;
                    proteinLengths.TryGetValue(p.Protein.Id.GetValueOrDefault(), out length);
                    unsearchedProteins.Add(new ProteinSearchInfo(p, length));
                }
            }

            if (untaggedProteins.Any(untagged => !untagged.GetProteinMetadata().NeedsSearch())) // did any get set as unsearchable?
            {
                // Write back the ones that were formerly without search terms, but which now indicate no search is possible
                using (ISession session = OpenWriteSession())
                {
                    using (var transaction = session.BeginTransaction())
                    {
                        foreach (var untagged in untaggedProteins.Where(untagged => !untagged.GetProteinMetadata().NeedsSearch()))
                            session.SaveOrUpdate(untagged); // update the metadata
                        transaction.Commit();
                    }
                }
            }

            if (unsearchedProteins.Any())
            {
                int resultsCount = 0;
                int unsearchedCount = unsearchedProteins.Count;
                for (bool success = true; success;)
                {
                    success = false; // Until we see at least one succeed this round
                    var results = new List<DbProteinName>();

                    // The "true" arg means "do just one batch then return"
                    foreach (var result in fastaImporter.DoWebserviceLookup(unsearchedProteins, null, true))
                    {
                        if (result != null)
                        {
                            if (
                            !progressMonitor.Invoke(
                                string.Format(
                                    Resources.ProteomeDb_LookupProteinMetadata_Retrieving_details_for__0__proteins,
                                    unsearchedProteins.Count), 100 * resultsCount++ / unsearchedCount))
                            {
                                return false;
                            }
                            success = true;
                            results.Add(result.ProteinDbInfo);
                        }
                    }
                    if (results.Any()) // save this batch
                    {
                        using (var session = OpenWriteSession())
                        {
                            using (var transaction = session.BeginTransaction())
                            {
                                foreach (var result in results)
                                    session.SaveOrUpdate(result); 
                                transaction.Commit();
                                session.Close();
                            }
                        }
                    }
                    // Edit this list rather than rederive with database access
                    var hits = unsearchedProteins.Where(p => !p.GetProteinMetadata().NeedsSearch()).ToList();
                    foreach (var hit in hits)
                    {
                        unsearchedProteins.Remove(hit);
                    }
                }
            }
            return true;
        }
Esempio n. 6
0
        public Digestion Digest(IProtease protease, ProgressMonitor progressMonitor)
        {
            using (ISession session = OpenWriteSession())
            {
                DbDigestion dbDigestion = GetDbDigestion(protease.Name);
                HashSet<string> existingSequences = new HashSet<string>();
                using (var transaction = session.BeginTransaction())
                {
                    if (dbDigestion != null)
                    {
                        if (dbDigestion.MaxSequenceLength >= MAX_SEQUENCE_LENGTH)
                        {
                            return new Digestion(this, dbDigestion);
                        }
                        if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_existing_peptides, 0))
                        {
                            return null;
                        }
                        IQuery query = session.CreateQuery("SELECT P.Sequence FROM " // Not L10N
                                                           + typeof(DbDigestedPeptide) + " P WHERE P.Digestion = :Digestion") // Not L10N
                            .SetParameter("Digestion", dbDigestion); // Not L10N
                        List<String> listSequences = new List<string>();
                        query.List(listSequences);
                        existingSequences.UnionWith(listSequences);
                        dbDigestion.MaxSequenceLength = MAX_SEQUENCE_LENGTH;
                        session.Update(dbDigestion);
                    }
                    else
                    {
                        dbDigestion = new DbDigestion
                        {
                            Name = protease.Name,
                            MinSequenceLength = MIN_SEQUENCE_LENGTH,
                            MaxSequenceLength = MAX_SEQUENCE_LENGTH,
                        };
                        session.Save(dbDigestion);
                    }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_proteins, 0)) 
                    {
                        return null;
                    }
                    List<DbProtein> proteins = new List<DbProtein>();
                    session.CreateCriteria(typeof(DbProtein)).List(proteins);
                    Dictionary<String, long> digestedPeptideIds
                        = new Dictionary<string, long>();
                    const String sqlPeptide =
                            "INSERT INTO ProteomeDbDigestedPeptide (Digestion, Sequence) VALUES(?,?);select last_insert_rowid();"; // Not L10N
                    using (var commandPeptide = session.Connection.CreateCommand())
                    using (var commandProtein = session.Connection.CreateCommand())
                    {
                        commandPeptide.CommandText = sqlPeptide;
                        commandPeptide.Parameters.Add(new SQLiteParameter());
                        commandPeptide.Parameters.Add(new SQLiteParameter());
                        const String sqlPeptideProtein =
                            "INSERT INTO ProteomeDbDigestedPeptideProtein (Peptide, Protein) VALUES(?,?);"; // Not L10N
                        commandProtein.CommandText = sqlPeptideProtein;
                        commandProtein.Parameters.Add(new SQLiteParameter());
                        commandProtein.Parameters.Add(new SQLiteParameter());
                        commandProtein.Parameters.Add(new SQLiteParameter());
                        for (int i = 0; i < proteins.Count; i++)
                        {
                            var proteinSequences = new HashSet<string>();
                            if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_Digest_Digesting__0__proteins,proteins.Count), 100 * i / proteins.Count))
                            {
                                return null;
                            }
                            Protein protein = new Protein(ProteomeDbPath, proteins[i]);

                            foreach (DigestedPeptide digestedPeptide in protease.Digest(protein))
                            {
                                if (digestedPeptide.Sequence.Length < dbDigestion.MinSequenceLength)
                                {
                                    continue;
                                }
                                String truncatedSequence = digestedPeptide.Sequence.Substring(
                                    0, Math.Min(digestedPeptide.Sequence.Length, dbDigestion.MaxSequenceLength));
                                if (existingSequences.Contains(truncatedSequence))
                                {
                                    continue;
                                }
                                if (proteinSequences.Contains(truncatedSequence))
                                {
                                    continue;
                                }
                                proteinSequences.Add(truncatedSequence);
                                long digestedPeptideId;
                                if (!digestedPeptideIds.TryGetValue(truncatedSequence, out digestedPeptideId))
                                {
                                    ((SQLiteParameter)commandPeptide.Parameters[0]).Value = dbDigestion.Id;
                                    ((SQLiteParameter)commandPeptide.Parameters[1]).Value = truncatedSequence;
                                    digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar());
                                    digestedPeptideIds.Add(truncatedSequence, digestedPeptideId);
                                }
                                ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptideId;
                                ((SQLiteParameter)commandProtein.Parameters[1]).Value = protein.Id;
                                commandProtein.ExecuteNonQuery();
                            }
                        }
                    }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                    {
                        return null;
                    }
                    transaction.Commit();

                    AnalyzeDb(session);
                    progressMonitor.Invoke(
                        string.Format(Resources.ProteomeDb_Digest_Digested__0__proteins_into__1__unique_peptides,
                                      proteins.Count, digestedPeptideIds.Count),
                        100);
                }
                return new Digestion(this, dbDigestion);
            }
        }
Esempio n. 7
0
        public void AddFastaFile(StreamReader reader, ProgressMonitor progressMonitor)
        {
            Dictionary<string, ProtIdNames> proteinIds = new Dictionary<string, ProtIdNames>();
            using (ISession session = OpenWriteSession())
            {
                foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List())
                {
                    if (protein.Id.HasValue)
                        proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, protein.Names));
                }
                int proteinCount = 0;
                using (var transaction = session.BeginTransaction())
                using (IDbCommand insertProtein = session.Connection.CreateCommand())
                using (IDbCommand insertName = session.Connection.CreateCommand())
                {
                    WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now
                    insertProtein.CommandText =
                        "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();"; // Not L10N
                    insertProtein.Parameters.Add(new SQLiteParameter());
                    insertName.CommandText =
                        "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N
                    insertName.Parameters.Add(new SQLiteParameter()); // Id
                    insertName.Parameters.Add(new SQLiteParameter()); // IsPrimary
                    insertName.Parameters.Add(new SQLiteParameter()); // Name
                    insertName.Parameters.Add(new SQLiteParameter()); // Description
                    insertName.Parameters.Add(new SQLiteParameter()); // PreferredName
                    insertName.Parameters.Add(new SQLiteParameter()); // Accession
                    insertName.Parameters.Add(new SQLiteParameter()); // Gene
                    insertName.Parameters.Add(new SQLiteParameter()); // Species
                    insertName.Parameters.Add(new SQLiteParameter()); // WebSearchInfo


                    foreach (DbProtein protein in fastaImporter.Import(reader))
                    {
                        int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1));
                        if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins,proteinCount), iProgress))
                        {
                            return;
                        }
                        bool existingProtein = false;
                        ProtIdNames proteinIdNames;
                        if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames))
                        {
                            existingProtein = true;
                        }
                        else
                        {
                            ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence;
                            proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]);
                            proteinIds.Add(protein.Sequence, proteinIdNames);
                            proteinCount++;
                        }
                        foreach (var proteinName in protein.Names)
                        {
                            // Skip any names that already exist
                            if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name)))
                                continue;

                            try
                            {
                                ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id;
                                ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein;
                                ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name;
                                ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description;
                                ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName;
                                ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession;
                                ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene;
                                ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species;
                                ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization
                                insertName.ExecuteNonQuery();
                            }
                            catch (Exception exception)
                            {
                                Console.Out.WriteLine(exception);
                            }
                        }
                    }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                    {
                        return;
                    }
                    transaction.Commit();
                }
                AnalyzeDb(session);
                progressMonitor.Invoke(
                    string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100);
            }
        }