示例#1
0
        public void TestImportValidFasta()
        {
            var importer = new WebEnabledFastaImporter();
            List <DbProtein> proteins = new List <DbProtein>();

            proteins.AddRange(importer.Import(new StringReader(STR_VALID_FASTA)));
            Assert.AreEqual(3, proteins.Count);
        }
        public void TestImportValidFasta()
        {
            var importer = new WebEnabledFastaImporter();
            List <DbProtein> proteins = new List <DbProtein>();

            proteins.AddRange(importer.Import(new StringReader(STR_VALID_FASTA)));
            Assert.AreEqual(3, proteins.Count);
            Assert.IsTrue(proteins.TrueForAll(p => !string.IsNullOrEmpty(p.Names.ToArray()[0].Gene))); // Test with and without OX= syntax
        }
示例#3
0
        private void DoWork()
        {
            try
            {
                // just do the basic name+description parsing, no regex or web access - we don't use extended metadata here
                var fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.FakeWebSearchProvider());
                var proteins      = new Dictionary <string, ProteinData>();
                foreach (var path in FastaFilePaths)
                {
                    var statusText = "Reading FASTA file " + Path.GetFileName(path);
                    var fileInfo   = new FileInfo(path);
                    var reader     = File.OpenText(path);
                    foreach (var protein in fastaImporter.Import(File.OpenText(path)))
                    {
                        if (!UpdateProgress(statusText, (int)(reader.BaseStream.Position * 100 / fileInfo.Length)))
                        {
                            return;
                        }
                        ProteinData proteinData;
                        if (!proteins.TryGetValue(protein.Sequence, out proteinData))
                        {
                            proteinData = new ProteinData(protein.Sequence);
                            proteins.Add(protein.Sequence, proteinData);
                        }
                        foreach (var name in protein.Names)
                        {
                            proteinData.AddName(name.Name, name.Description);
                        }
                    }
                }
                UpdatePeptides(proteins);

                if (!IsDisposed)
                {
                    BeginInvoke(new Action(Close));
                }
            }
            finally
            {
                lock (this)
                {
                    _running = false;
                    Monitor.PulseAll(this);
                }
            }
        }
        public void TestImportInvalidFasta()
        {
            var importer = new WebEnabledFastaImporter();
            List <DbProtein> proteins = new List <DbProtein>();

            try
            {
                proteins.AddRange(importer.Import(new StringReader(STR_INVALID_FASTA)));
                Assert.Fail("Expected InvalidDataException to be thrown.");
            }
            catch (InvalidDataException invalidDataException)
            {
                string expectedMessage = string.Format(
                    Resources.WebEnabledFastaImporter_ValidateProteinSequence_A_protein_sequence_cannot_contain_the_character___0___at_line__1_,
                    '>', 16);
                Assert.AreEqual(expectedMessage, invalidDataException.Message);
            }
            Assert.AreEqual(1, proteins.Count);
        }
 public void TestIsValidProteinSequenceChar()
 {
     // We only check the first 256 characters because the whole character range takes too long
     // to throw that many exceptions.
     for (int chValue = 1; chValue <= 256; chValue++)
     {
         char ch       = (char)chValue;
         var  sequence = new string(ch, 1);
         bool exceptionExpected;
         try
         {
             FastaSequence.ValidateSequence(sequence);
             exceptionExpected = false;
         }
         catch
         {
             exceptionExpected = true;
         }
         Assert.AreEqual(!exceptionExpected, WebEnabledFastaImporter.IsValidProteinSequenceChar(ch));
     }
 }
示例#6
0
        public void DoTestOlderProteomeDb(TestContext testContext, bool doActualWebAccess)
        {
            using (var testFilesDir = new TestFilesDir(testContext, ZIP_FILE))
            {
                string fastaPath  = testFilesDir.GetTestPath("tiny.fasta");
                string protDbPath = testFilesDir.GetTestPath("celegans_mini.protdb"); // a version 0 protdb file
                string blibPath   = testFilesDir.GetTestPath("random.blib");          // a bibliospec file

                // What happens when you try to open a random file as a protdb file?
                AssertEx.ThrowsException <DbException>(() => ProteomeDb.OpenProteomeDb(fastaPath));

                // What happens when you try to open a non-protdb database file as a protdb file?
                AssertEx.ThrowsException <FileLoadException>(() => ProteomeDb.OpenProteomeDb(blibPath));

                using (ProteomeDb proteomeDb = ProteomeDb.OpenProteomeDb(protDbPath))
                {
                    Assert.IsTrue(proteomeDb.GetSchemaVersionMajor() == 0); // the initial db from our zipfile should be ancient
                    Assert.IsTrue(proteomeDb.GetSchemaVersionMinor() == 0); // the initial db from our zipfile should be ancient
                    Assert.AreEqual(9, proteomeDb.GetProteinCount());

                    var protein = proteomeDb.GetProteinByName("Y18D10A.20");
                    Assert.IsNotNull(protein);
                    Assert.IsTrue(String.IsNullOrEmpty(protein.Accession)); // old db won't have this populated

                    WebEnabledFastaImporter searcher = new WebEnabledFastaImporter(doActualWebAccess ? null :new WebEnabledFastaImporter.FakeWebSearchProvider());
                    bool            searchComplete;
                    IProgressStatus status = new ProgressStatus(string.Empty);
                    Assert.IsTrue(proteomeDb.LookupProteinMetadata(new SilentProgressMonitor(), ref status, searcher, false, out searchComplete)); // add any missing protein metadata
                    Assert.IsTrue(searchComplete);

                    protein = proteomeDb.GetProteinByName("Y18D10A.20");
                    Assert.IsNotNull(protein);
                    if (doActualWebAccess) // We can actually go to the web for metadata
                    {
                        Assert.AreEqual("Q9XW16", protein.Accession);
                    }

                    using (var reader = new StreamReader(fastaPath))
                    {
                        proteomeDb.AddFastaFile(reader, new SilentProgressMonitor(), ref status, false);
                    }
                    // the act of writing should update to the current version
                    Assert.AreEqual(ProteomeDb.SCHEMA_VERSION_MAJOR_CURRENT, proteomeDb.GetSchemaVersionMajor());
                    Assert.AreEqual(ProteomeDb.SCHEMA_VERSION_MINOR_CURRENT, proteomeDb.GetSchemaVersionMinor());
                    Assert.AreEqual(19, proteomeDb.GetProteinCount());

                    // check for propery processed protein metadata
                    Assert.IsTrue(proteomeDb.LookupProteinMetadata(new SilentProgressMonitor(), ref status, searcher, false, out searchComplete));
                    Assert.IsTrue(searchComplete);
                    protein = proteomeDb.GetProteinByName("IPI00000044");
                    Assert.IsNotNull(protein);
                    Assert.AreEqual("P01127", protein.Accession); // We get this offline with our ipi->uniprot mapper
                    if (doActualWebAccess)
                    {
                        Assert.AreEqual("PDGFB_HUMAN", protein.PreferredName); // But this we get only with web access
                    }

/*
 *                  // TODO(bspratt): fix  "GetDigestion has no notion of a Db that has been added to, doesn't digest the new proteins and returns immediately (issue #304)"
 *                  Enzyme trypsin = EnzymeList.GetDefault();
 *                  proteomeDb.Digest(trypsin,  new SilentProgressMonitor());
 *                  Digestion digestion = proteomeDb.GetDigestion(trypsin.Name);
 *                  var digestedProteins0 = digestion.GetProteinsWithSequencePrefix("EDGWVK", 100);
 *                  Assert.IsTrue(digestedProteins0.Count >= 1);
 * */
                }
            }
        }
示例#7
0
        /// <summary>
        /// Access the web to resolve protein metadata not directly found in fasta file.
        /// The fasta text importer will have left search hints in ProteinMetadata.
        /// </summary>
        /// <param name="progressMonitor"></param>
        /// <param name="status"></param>
        /// <param name="fastaImporter">object that accesses the web, or pretends to if in a test</param>
        /// <param name="parseOnly">if true, attempt to parse protein metadata from descriptions but do not proceed to web access</param>
        /// <param name="done">will return true if there is nothung more to look up</param>
        /// <returns>true on success</returns>
        public bool LookupProteinMetadata(IProgressMonitor progressMonitor, ref IProgressStatus status, WebEnabledFastaImporter fastaImporter, bool parseOnly, out bool done)
        {
            var unsearchedProteins = new List <ProteinSearchInfo>();

            done = false;
            // If we're here, it's because the background loader is done digesting and has moved on to protein metadata,
            // or because the PeptideSettingsUI thread needs to have protein metadata resolved for uniqueness purposes before
            // it can proceed.   Either way, we should be working on a temp copy and be the only one needing write access, so get a lock now
            using (ISession session = OpenWriteSession())       // We may update the protdb file with web search results
            {
                if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, Resources.ProteomeDb_LookupProteinMetadata_looking_for_unresolved_protein_details, 0))
                {
                    return(false);
                }
                // get a list of proteins with unresolved metadata websearches
                var proteinNames     = session.CreateCriteria(typeof(DbProteinName)).List <DbProteinName>().Where(x => x.WebSearchInfo.NeedsSearch()).ToList();
                var proteinsToSearch =
                    proteinNames.Where(proteinName => (proteinName.GetProteinMetadata().GetPendingSearchTerm().Length > 0))
                    .ToList();
                if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, Resources.ProteomeDb_LookupProteinMetadata_looking_for_unresolved_protein_details, 0))
                {
                    return(false);
                }
                // and a list of proteins which have never been considered for metadata search
                var untaggedProteins = proteinNames.Where(proteinName => proteinName.WebSearchInfo.IsEmpty()).ToList();

                foreach (var untaggedProtein in untaggedProteins)
                {
                    untaggedProtein.SetWebSearchCompleted(); // by default take this out of consideration for next time
                    var metadata = untaggedProtein.GetProteinMetadata();
                    if (metadata.HasMissingMetadata())
                    {
                        var search = fastaImporter.ParseProteinMetaData(metadata);
                        if (search != null)
                        {
                            metadata = untaggedProtein.ChangeProteinMetadata(metadata.Merge(search)); // don't stomp name by accident
                            metadata = untaggedProtein.ChangeProteinMetadata(metadata.ChangeWebSearchInfo(search.WebSearchInfo));
                        }
                    }
                    if (metadata.NeedsSearch())
                    {
                        proteinsToSearch.Add(untaggedProtein); // add to the list of things to commit back to the db
                    }
                }
                // Get the lengths of the sequences without getting the sequences themselves, for best speed
                var proteinIds     = proteinsToSearch.Select(name => name.Protein.Id.Value).Distinct().ToArray();
                var proteinLengths = new Dictionary <long, int>();
                using (var cmd = session.Connection.CreateCommand())
                {
                    string sql = @"SELECT Id, LENGTH(Sequence) AS SequenceLength FROM ProteomeDbProtein P";
                    if (proteinIds.Length < 1000)
                    {
                        sql += @" WHERE P.Id IN (" +
                               string.Join(@",", proteinIds) + @")";
                    }
                    cmd.CommandText = sql;
                    using (var reader = cmd.ExecuteReader())
                    {
                        while (reader.Read())
                        {
                            var id  = reader.GetValue(0);
                            var len = reader.GetValue(1);
                            proteinLengths.Add(Convert.ToInt64(id), Convert.ToInt32(len));
                            if (proteinLengths.Count % 100 == 0)  // Periodic cancellation check
                            {
                                if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, Resources.ProteomeDb_LookupProteinMetadata_looking_for_unresolved_protein_details, 0))
                                {
                                    return(false);
                                }
                            }
                        }
                    }
                }
                foreach (var p in proteinsToSearch)
                {
                    int length;
                    proteinLengths.TryGetValue(p.Protein.Id.GetValueOrDefault(), out length);
                    unsearchedProteins.Add(new ProteinSearchInfo(p, length));
                }

                if (untaggedProteins.Any(untagged => !untagged.GetProteinMetadata().NeedsSearch())) // did any get set as unsearchable?
                {
                    // Write back the ones that were formerly without search terms, but which now indicate no search is possible
                    using (var transaction = session.BeginTransaction())
                    {
                        foreach (var untagged in untaggedProteins.Where(untagged => !untagged.GetProteinMetadata().NeedsSearch()))
                        {
                            session.SaveOrUpdate(untagged); // update the metadata
                        }
                        transaction.Commit();
                    }
                }

                if (unsearchedProteins.Any() && !parseOnly)
                {
                    int resultsCount    = 0;
                    int unsearchedCount = unsearchedProteins.Count;
                    for (bool success = true; success;)
                    {
                        success = false; // Until we see at least one succeed this round
                        var results = new List <DbProteinName>();
                        if (progressMonitor.IsCanceled)
                        {
                            return(false);
                        }

                        // The "true" arg means "do just one batch then return"
                        foreach (var result in fastaImporter.DoWebserviceLookup(unsearchedProteins, progressMonitor, true))
                        {
                            if (result != null)
                            {
                                string message = string.Format(Resources.ProteomeDb_LookupProteinMetadata_Retrieving_details_for__0__proteins,
                                                               unsearchedProteins.Count);
                                // Make it clearer when web access is faked during testing
                                if (fastaImporter.IsAccessFaked)
                                {
                                    message = @"FAKED: " + message;
                                }
                                if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, message, 100 * resultsCount++ / unsearchedCount))
                                {
                                    return(false);
                                }
                                success = true;
                                results.Add(result.ProteinDbInfo);
                            }
                        }
                        if (results.Any()) // save this batch
                        {
                            using (var transaction = session.BeginTransaction())
                            {
                                foreach (var result in results)
                                {
                                    session.SaveOrUpdate(result);
                                }
                                transaction.Commit();
                            }
                        }
                        // Edit this list rather than rederive with database access
                        var hits = unsearchedProteins.Where(p => !p.GetProteinMetadata().NeedsSearch()).ToList();
                        foreach (var hit in hits)
                        {
                            unsearchedProteins.Remove(hit);
                        }
                    }
                }
                done = !unsearchedProteins.Any();
            } // End writesession
            return(true);
        }
示例#8
0
        public void AddFastaFile(StreamReader reader, IProgressMonitor progressMonitor, ref IProgressStatus status,
                                 bool delayAnalyzeDb, out int duplicateSequenceCount)
        {
            Dictionary <string, ProtIdNames> proteinIds = new Dictionary <string, ProtIdNames>();

            using (IStatelessSession session = SessionFactory.OpenStatelessSession()) // This is a long session, but there's no harm since db is useless till its done
            {
                var proteinNames = session.CreateCriteria <DbProteinName>().List <DbProteinName>().ToLookup(name => name.Id.Value);
                foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List())
                {
                    if (protein.Id.HasValue)
                    {
                        proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, proteinNames[protein.Id.Value].ToArray()));
                    }
                }
                int proteinCount = 0;
                duplicateSequenceCount = 0;
                using (var transaction = session.BeginTransaction())
                    using (IDbCommand insertProtein = session.Connection.CreateCommand())
                        using (IDbCommand insertName = session.Connection.CreateCommand())
                        {
                            WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now
                            insertProtein.CommandText =
                                @"INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();";
                            insertProtein.Parameters.Add(new SQLiteParameter());
                            insertName.CommandText =
                                @"INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)";
                            insertName.Parameters.Add(new SQLiteParameter()); // Id
                            insertName.Parameters.Add(new SQLiteParameter()); // IsPrimary
                            insertName.Parameters.Add(new SQLiteParameter()); // Name
                            insertName.Parameters.Add(new SQLiteParameter()); // Description
                            insertName.Parameters.Add(new SQLiteParameter()); // PreferredName
                            insertName.Parameters.Add(new SQLiteParameter()); // Accession
                            insertName.Parameters.Add(new SQLiteParameter()); // Gene
                            insertName.Parameters.Add(new SQLiteParameter()); // Species
                            insertName.Parameters.Add(new SQLiteParameter()); // WebSearchInfo


                            foreach (DbProtein protein in fastaImporter.Import(reader))
                            {
                                int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1));
                                if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins, proteinCount), iProgress))
                                {
                                    return;
                                }
                                bool        existingProtein = false;
                                ProtIdNames proteinIdNames;
                                if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames))
                                {
                                    existingProtein = true;
                                    duplicateSequenceCount++;
                                }
                                else
                                {
                                    ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence;
                                    proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]);
                                    proteinIds.Add(protein.Sequence, proteinIdNames);
                                    proteinCount++;
                                }
                                foreach (var proteinName in protein.Names)
                                {
                                    // Skip any names that already exist
                                    if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name)))
                                    {
                                        continue;
                                    }

                                    try
                                    {
                                        ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id;
                                        ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein;
                                        ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name;
                                        ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description;
                                        ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName;
                                        ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession;
                                        ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene;
                                        ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species;
                                        ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization
                                        insertName.ExecuteNonQuery();
                                    }
                                    catch (Exception exception)
                                    {
                                        Console.Out.WriteLine(exception);
                                    }
                                }
                            }
                            if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                            {
                                return;
                            }

                            if (HasSubsequencesTable(() => session.Connection))
                            {
                                DigestProteins(session.Connection, proteinIds, progressMonitor, ref status);
                            }
                            if (progressMonitor.IsCanceled)
                            {
                                return;
                            }
                            transaction.Commit();
                        }
                if (!delayAnalyzeDb)
                {
                    AnalyzeDb(session.Connection); // NB This runs asynchronously and may interfere with further writes
                }
                UpdateProgressAndCheckForCancellation(progressMonitor, ref status,
                                                      string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100);
            }
        }
示例#9
0
        /// <summary>
        /// Access the web to resolve protein metadata not directly found in fasta file.
        /// The fasta text importer will have left search hints in ProteinMetadata.
        /// </summary>
        /// <param name="progressMonitor"></param>
        /// <param name="fastaImporter">object that accesses the web, or pretends to if in a test</param>
        /// <param name="polite">if true, don't try to resolve everything in one go, assume we can come back later</param>
        /// <returns>true on success</returns>
        public bool LookupProteinMetadata(ProgressMonitor progressMonitor, WebEnabledFastaImporter fastaImporter, bool polite = false)
        {
            var unsearchedProteins = new List <ProteinSearchInfo>();
            List <DbProteinName> untaggedProteins;

            using (ISession session = OpenSession())
            {
                if (!progressMonitor.Invoke(Resources.ProteomeDb_LookupProteinMetadata_looking_for_unresolved_protein_details, 0))
                {
                    return(false);
                }

                // get a list of proteins with unresolved metadata websearches
                var proteinNames     = session.CreateCriteria(typeof(DbProteinName)).List <DbProteinName>();
                var proteinsToSearch =
                    proteinNames.Where(proteinName => (proteinName.GetProteinMetadata().GetPendingSearchTerm().Length > 0))
                    .ToList();
                // and a list of proteins which have never been considered for metadata search
                untaggedProteins =
                    proteinNames.Where(proteinName => proteinName.WebSearchInfo.IsEmpty()).ToList();

                foreach (var untaggedProtein in untaggedProteins)
                {
                    untaggedProtein.SetWebSearchCompleted(); // by default take this out of consideration for next time
                    var metadata = untaggedProtein.GetProteinMetadata();
                    if (metadata.HasMissingMetadata())
                    {
                        var search = fastaImporter.ParseProteinMetaData(metadata);
                        if (search != null)
                        {
                            metadata = untaggedProtein.ChangeProteinMetadata(metadata.Merge(search)); // don't stomp name by accident
                            metadata = untaggedProtein.ChangeProteinMetadata(metadata.ChangeWebSearchInfo(search.WebSearchInfo));
                        }
                    }
                    if (metadata.NeedsSearch())
                    {
                        proteinsToSearch.Add(untaggedProtein); // add to the list of things to commit back to the db
                    }
                }
                // Get the lengths of the sequences without getting the sequences themselves, for best speed
                var proteinIds     = proteinsToSearch.Select(name => name.Protein.Id.Value).Distinct().ToArray();
                var proteinLengths = new Dictionary <long, int>();
                using (var cmd = session.Connection.CreateCommand())
                {
                    string sql = "SELECT Id, LENGTH(Sequence) AS SequenceLength FROM ProteomeDbProtein P"; // Not L10N
                    if (proteinIds.Length < 1000)
                    {
                        sql += " WHERE P.Id IN (" +                // Not L10N
                               string.Join(",", proteinIds) + ")"; // Not L10N
                    }
                    cmd.CommandText = sql;
                    using (var reader = cmd.ExecuteReader())
                    {
                        while (reader.Read())
                        {
                            var id  = reader.GetValue(0);
                            var len = reader.GetValue(1);
                            proteinLengths.Add(Convert.ToInt64(id), Convert.ToInt32(len));
                        }
                    }
                }
                foreach (var p in proteinsToSearch)
                {
                    int length;
                    proteinLengths.TryGetValue(p.Protein.Id.GetValueOrDefault(), out length);
                    unsearchedProteins.Add(new ProteinSearchInfo(p, length));
                }
            }

            if (untaggedProteins.Any(untagged => !untagged.GetProteinMetadata().NeedsSearch())) // did any get set as unsearchable?
            {
                // Write back the ones that were formerly without search terms, but which now indicate no search is possible
                using (ISession session = OpenWriteSession())
                {
                    using (var transaction = session.BeginTransaction())
                    {
                        foreach (var untagged in untaggedProteins.Where(untagged => !untagged.GetProteinMetadata().NeedsSearch()))
                        {
                            session.SaveOrUpdate(untagged); // update the metadata
                        }
                        transaction.Commit();
                    }
                }
            }

            if (unsearchedProteins.Any())
            {
                int resultsCount    = 0;
                int unsearchedCount = unsearchedProteins.Count;
                for (bool success = true; success;)
                {
                    success = false; // Until we see at least one succeed this round
                    var results = new List <DbProteinName>();

                    // The "true" arg means "do just one batch then return"
                    foreach (var result in fastaImporter.DoWebserviceLookup(unsearchedProteins, null, true))
                    {
                        if (result != null)
                        {
                            if (
                                !progressMonitor.Invoke(
                                    string.Format(
                                        Resources.ProteomeDb_LookupProteinMetadata_Retrieving_details_for__0__proteins,
                                        unsearchedProteins.Count), 100 * resultsCount++ / unsearchedCount))
                            {
                                return(false);
                            }
                            success = true;
                            results.Add(result.ProteinDbInfo);
                        }
                    }
                    if (results.Any()) // save this batch
                    {
                        using (var session = OpenWriteSession())
                        {
                            using (var transaction = session.BeginTransaction())
                            {
                                foreach (var result in results)
                                {
                                    session.SaveOrUpdate(result);
                                }
                                transaction.Commit();
                                session.Close();
                            }
                        }
                    }
                    // Edit this list rather than rederive with database access
                    var hits = unsearchedProteins.Where(p => !p.GetProteinMetadata().NeedsSearch()).ToList();
                    foreach (var hit in hits)
                    {
                        unsearchedProteins.Remove(hit);
                    }
                }
            }
            return(true);
        }
示例#10
0
        public void AddFastaFile(StreamReader reader, ProgressMonitor progressMonitor)
        {
            Dictionary <string, ProtIdNames> proteinIds = new Dictionary <string, ProtIdNames>();

            using (ISession session = OpenWriteSession())
            {
                foreach (DbProtein protein in session.CreateCriteria(typeof(DbProtein)).List())
                {
                    if (protein.Id.HasValue)
                    {
                        proteinIds.Add(protein.Sequence, new ProtIdNames(protein.Id.Value, protein.Names));
                    }
                }
                int proteinCount = 0;
                using (var transaction = session.BeginTransaction())
                    using (IDbCommand insertProtein = session.Connection.CreateCommand())
                        using (IDbCommand insertName = session.Connection.CreateCommand())
                        {
                            WebEnabledFastaImporter fastaImporter = new WebEnabledFastaImporter(new WebEnabledFastaImporter.DelayedWebSearchProvider()); // just parse, no search for now
                            insertProtein.CommandText =
                                "INSERT INTO ProteomeDbProtein (Version, Sequence) Values (1,?);select last_insert_rowid();";                            // Not L10N
                            insertProtein.Parameters.Add(new SQLiteParameter());
                            insertName.CommandText =
                                "INSERT INTO ProteomeDbProteinName (Version, Protein, IsPrimary, Name, Description, PreferredName, Accession, Gene, Species, WebSearchStatus) Values(1,?,?,?,?,?,?,?,?,?)"; // Not L10N
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Id
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // IsPrimary
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Name
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Description
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // PreferredName
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Accession
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Gene
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // Species
                            insertName.Parameters.Add(new SQLiteParameter());                                                                                                                               // WebSearchInfo


                            foreach (DbProtein protein in fastaImporter.Import(reader))
                            {
                                int iProgress = (int)(reader.BaseStream.Position * 100 / (reader.BaseStream.Length + 1));
                                if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_AddFastaFile_Added__0__proteins, proteinCount), iProgress))
                                {
                                    return;
                                }
                                bool        existingProtein = false;
                                ProtIdNames proteinIdNames;
                                if (proteinIds.TryGetValue(protein.Sequence, out proteinIdNames))
                                {
                                    existingProtein = true;
                                }
                                else
                                {
                                    ((SQLiteParameter)insertProtein.Parameters[0]).Value = protein.Sequence;
                                    proteinIdNames = new ProtIdNames(Convert.ToInt64(insertProtein.ExecuteScalar()), new DbProteinName[0]);
                                    proteinIds.Add(protein.Sequence, proteinIdNames);
                                    proteinCount++;
                                }
                                foreach (var proteinName in protein.Names)
                                {
                                    // Skip any names that already exist
                                    if (proteinIdNames.Names.Any(dbProteinName => Equals(dbProteinName.Name, proteinName.Name)))
                                    {
                                        continue;
                                    }

                                    try
                                    {
                                        ((SQLiteParameter)insertName.Parameters[0]).Value = proteinIdNames.Id;
                                        ((SQLiteParameter)insertName.Parameters[1]).Value = proteinName.IsPrimary && !existingProtein;
                                        ((SQLiteParameter)insertName.Parameters[2]).Value = proteinName.Name;
                                        ((SQLiteParameter)insertName.Parameters[3]).Value = proteinName.Description;
                                        ((SQLiteParameter)insertName.Parameters[4]).Value = proteinName.PreferredName;
                                        ((SQLiteParameter)insertName.Parameters[5]).Value = proteinName.Accession;
                                        ((SQLiteParameter)insertName.Parameters[6]).Value = proteinName.Gene;
                                        ((SQLiteParameter)insertName.Parameters[7]).Value = proteinName.Species;
                                        ((SQLiteParameter)insertName.Parameters[8]).Value = proteinName.WebSearchStatus; // represent as a string for ease of serialization
                                        insertName.ExecuteNonQuery();
                                    }
                                    catch (Exception exception)
                                    {
                                        Console.Out.WriteLine(exception);
                                    }
                                }
                            }
                            if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                            {
                                return;
                            }
                            transaction.Commit();
                        }
                AnalyzeDb(session);
                progressMonitor.Invoke(
                    string.Format(Resources.ProteomeDb_AddFastaFile_Finished_importing__0__proteins, proteinCount), 100);
            }
        }