Example #1
0
 internal Protein(ProteomeDbPath proteomeDb, DbProtein protein, DbProteinName primaryName)
     : base(proteomeDb, protein)
 {
     Sequence = protein.Sequence;
     if (primaryName != null)
     {
         _proteinMetadata = primaryName.GetProteinMetadata();
         if (primaryName.Protein != null)
         {
             // grab the alternative names now, rather than going back to the db later
             _alternativeNames = new List <ProteinMetadata>();
             foreach (var name in primaryName.Protein.Names)
             {
                 if (!name.IsPrimary)
                 {
                     _alternativeNames.Add(name.GetProteinMetadata());
                 }
             }
         }
     }
 }
Example #2
0
        public void Digest(IProgressMonitor progressMonitor, ref IProgressStatus progressStatus)
        {
            try
            {
                using (var session = OpenStatelessSession(true))
                {
                    using (var transation = session.BeginTransaction())
                    {
                        var noNames          = new DbProteinName[0];
                        var proteinSequences =
                            session.CreateQuery(@"SELECT P.Sequence, P.Id FROM " + typeof(DbProtein) + @" P")
                            .List <object[]>()
                            .ToDictionary(row => (string)row[0], row => new ProtIdNames((long)row[1], noNames));

                        if (!HasSubsequencesTable(() => session.Connection))
                        {
                            session.CreateSQLQuery(
                                @"CREATE TABLE ProteomeDbSubsequence (Sequence TEXT not null, ProteinIdBytes BLOB, primary key (Sequence));")
                            .ExecuteUpdate();
                        }
                        DigestProteins(session.Connection, proteinSequences, progressMonitor, ref progressStatus);
                        if (progressMonitor.IsCanceled)
                        {
                            return;
                        }
                        transation.Commit();
                    }
                }
            }
            catch (Exception)
            {
                // If the operation was cancelled, then we want to throw OperationCancelledException instead of whatever we caught
                CancellationToken.ThrowIfCancellationRequested();
                // Otherwise, throw the original exception
                throw;
            }
        }
Example #3
0
        private static DbProteinName GetProteinName(ISession session, string searchName)
        {
            ICriteria criteriaName = session.CreateCriteria(typeof(DbProteinName))
                                     .Add(Restrictions.Eq(@"Name", searchName));
            DbProteinName proteinName = (DbProteinName)criteriaName.UniqueResult();

            if (proteinName != null)
            {
                return(proteinName);
            }
            string[] hints     = { @"Accession", @"Gene", @"PreferredName" };
            var      criterion = Restrictions.Disjunction();

            foreach (var name in hints)
            {
                criterion.Add(Restrictions.Eq(name, searchName));
            }
            List <DbProteinName> proteinNames = new List <DbProteinName>();
            ICriteria            criteria     = session.CreateCriteria(typeof(DbProteinName))
                                                .Add(criterion).SetMaxResults(1);

            criteria.List(proteinNames);
            return(proteinNames.Any() ? proteinNames[0] : null);
        }
 public ProteinSearchInfo()
 {
     ProteinDbInfo = new DbProteinName();
     SeqLength = 0;
     Status = SearchStatus.unsearched;
 }
 public ProteinSearchInfo(DbProteinName dbProteinDbInfo, int sequenceLength)
 {
     ProteinDbInfo = dbProteinDbInfo;
     SeqLength = sequenceLength;
     Status = SearchStatus.unsearched;
 }
        /// <summary>
        /// Uses the known list of regexes to parse lineIn, keeping the
        /// result that fills in the most metdata.  In the event of a tie,
        /// first result wins - so regex list order matters.
        /// Populates the WebSearchInfo field but does not perform 
        /// the actual search - that's done elsewhere.
        /// </summary>
        /// <param name="lineIn">the text to be parsed</param>
        public ProteinMetadata ParseProteinMetaData(String lineIn)
        {
            if (lineIn.Length <= 0)
                return null;   

            var line = lineIn.Replace('\t', ' '); // regularize whitespace for simpler regexes

            // If there is a second >, then this is a custom name, and not
            // a real FASTA sequence.
            int start = (line.Length > 0 && line[0] == '>' ? 1 : 0);
            if (line.Length > 1 && line[1] == '>')
            {
                start++;
            }
            
            ProteinMetadata bestResult = null;
            var bestCount = 0;
            foreach (var r in _regexFasta)
            {
                Match match = r.RegexPattern.Match(line.Substring(start));
                if (match.Success)
                {
                    // a hit - now use the replacement expression to get the ProteinMetadata parts
                    string[] regexOutputs = r.RegexPattern.Replace(line.Substring(start), r.RegexReplacement).Split('\n');
                    var headerResult = new DbProteinName();
                    string searchterm = null; // assume no webservice lookup unless told otherwise
                    int dbColumnsFound = 0;
                    for (var n = regexOutputs.Length; n-- > 0;)
                    {
                        var split = regexOutputs[n].Split(new[] {':'}, 2); // split on first colon only
                        if (split.Length == 2)
                        {
                            var type = split[0].Trim();
                            var val = split[1].Trim();
                            if (val.Contains("${")) // failed match // Not L10N
                            {
                                val = String.Empty;
                            }
                            if (val.Length > 0)
                            {
                                dbColumnsFound++; // valid entry
                                switch (type)
                                {
                                    case "name": // Not L10N
                                        headerResult.Name = val;
                                        break;
                                    case "description": // Not L10N
                                        headerResult.Description = val;
                                        break;
                                    case "accession": // Not L10N
                                        headerResult.Accession = val;
                                        break;
                                    case "preferredname": // Not L10N
                                        headerResult.PreferredName = val;
                                        break;
                                    case "gene": // Not L10N
                                        headerResult.Gene = val;
                                        break;
                                    case "species": // Not L10N
                                        headerResult.Species = val;
                                        break;
                                    case "searchterm": // Not L10N
                                        dbColumnsFound--; // not actually a db column
                                        searchterm = val;
                                        break;
                                    default:
                                        throw new ArgumentOutOfRangeException(
                                            String.Format("Unknown Fasta RegEx output formatter type \'{0}\'",    // Not L10N
                                                regexOutputs[n]));
                                }

                            }
                        }
                        else
                        {
                            throw new ArgumentOutOfRangeException(
                                String.Format("Fasta RegEx failure in \'{0}\'",  // Not L10N
                                    line.Substring(start)));
                        }
                    }
                    if (headerResult.GetProteinMetadata().HasMissingMetadata())
                    {
                        if (searchterm != null)
                        {
                            // shave off any alternatives (might look like "IPI:IPI00197700.1|SWISS-PROT:P04638|ENSEMBL:ENSRNOP00000004662|REFSEQ:NP_037244")
                            searchterm = searchterm.Split('|')[0];
                            // a reasonable accession value will have at least one digit in it, and won't have things like tabs and parens and braces that confuse web services
                            if ("0123456789".Any(searchterm.Contains) && !" \t()[]".Any(searchterm.Contains))  // Not L10N
                                headerResult.SetWebSearchTerm(new WebSearchTerm(searchterm[0], searchterm.Substring(1))); // we'll need to hit the webservices to get this missing info
                        }
                    }
                    if (headerResult.GetProteinMetadata().WebSearchInfo.IsEmpty())
                        headerResult.SetWebSearchCompleted(); // no search possible
                    if (dbColumnsFound > bestCount)
                    {
                        bestCount = dbColumnsFound; // best match so far - tie goes to the first hit so order matters
                        bestResult = headerResult.GetProteinMetadata();
                    }
                }
            }
            return bestResult;
        }
 private DbProtein ParseProteinLine(String line)
 {
     String[] alternatives = line.Substring(1).Split((char) 1);
     var protein = new DbProtein();
     var proteinMetadata = ParseProteinMetaData(alternatives[0]);
     var dbName = new DbProteinName(null,proteinMetadata);
     protein.Names.Add(dbName);
     for (int i = 1; i < alternatives.Length; i++)
     {
         if (alternatives[i].Length > 0)
         {
             var altProteinMetadata = ParseProteinMetaData(alternatives[i]);
             var altName = new DbProteinName(protein,altProteinMetadata);
             protein.Names.Add(altName);
         }
     }
     return protein;
 }