internal Protein(ProteomeDbPath proteomeDb, DbProtein protein, DbProteinName primaryName) : base(proteomeDb, protein) { Sequence = protein.Sequence; if (primaryName != null) { _proteinMetadata = primaryName.GetProteinMetadata(); if (primaryName.Protein != null) { // grab the alternative names now, rather than going back to the db later _alternativeNames = new List <ProteinMetadata>(); foreach (var name in primaryName.Protein.Names) { if (!name.IsPrimary) { _alternativeNames.Add(name.GetProteinMetadata()); } } } } }
public void Digest(IProgressMonitor progressMonitor, ref IProgressStatus progressStatus) { try { using (var session = OpenStatelessSession(true)) { using (var transation = session.BeginTransaction()) { var noNames = new DbProteinName[0]; var proteinSequences = session.CreateQuery(@"SELECT P.Sequence, P.Id FROM " + typeof(DbProtein) + @" P") .List <object[]>() .ToDictionary(row => (string)row[0], row => new ProtIdNames((long)row[1], noNames)); if (!HasSubsequencesTable(() => session.Connection)) { session.CreateSQLQuery( @"CREATE TABLE ProteomeDbSubsequence (Sequence TEXT not null, ProteinIdBytes BLOB, primary key (Sequence));") .ExecuteUpdate(); } DigestProteins(session.Connection, proteinSequences, progressMonitor, ref progressStatus); if (progressMonitor.IsCanceled) { return; } transation.Commit(); } } } catch (Exception) { // If the operation was cancelled, then we want to throw OperationCancelledException instead of whatever we caught CancellationToken.ThrowIfCancellationRequested(); // Otherwise, throw the original exception throw; } }
private static DbProteinName GetProteinName(ISession session, string searchName) { ICriteria criteriaName = session.CreateCriteria(typeof(DbProteinName)) .Add(Restrictions.Eq(@"Name", searchName)); DbProteinName proteinName = (DbProteinName)criteriaName.UniqueResult(); if (proteinName != null) { return(proteinName); } string[] hints = { @"Accession", @"Gene", @"PreferredName" }; var criterion = Restrictions.Disjunction(); foreach (var name in hints) { criterion.Add(Restrictions.Eq(name, searchName)); } List <DbProteinName> proteinNames = new List <DbProteinName>(); ICriteria criteria = session.CreateCriteria(typeof(DbProteinName)) .Add(criterion).SetMaxResults(1); criteria.List(proteinNames); return(proteinNames.Any() ? proteinNames[0] : null); }
public ProteinSearchInfo() { ProteinDbInfo = new DbProteinName(); SeqLength = 0; Status = SearchStatus.unsearched; }
public ProteinSearchInfo(DbProteinName dbProteinDbInfo, int sequenceLength) { ProteinDbInfo = dbProteinDbInfo; SeqLength = sequenceLength; Status = SearchStatus.unsearched; }
/// <summary> /// Uses the known list of regexes to parse lineIn, keeping the /// result that fills in the most metdata. In the event of a tie, /// first result wins - so regex list order matters. /// Populates the WebSearchInfo field but does not perform /// the actual search - that's done elsewhere. /// </summary> /// <param name="lineIn">the text to be parsed</param> public ProteinMetadata ParseProteinMetaData(String lineIn) { if (lineIn.Length <= 0) return null; var line = lineIn.Replace('\t', ' '); // regularize whitespace for simpler regexes // If there is a second >, then this is a custom name, and not // a real FASTA sequence. int start = (line.Length > 0 && line[0] == '>' ? 1 : 0); if (line.Length > 1 && line[1] == '>') { start++; } ProteinMetadata bestResult = null; var bestCount = 0; foreach (var r in _regexFasta) { Match match = r.RegexPattern.Match(line.Substring(start)); if (match.Success) { // a hit - now use the replacement expression to get the ProteinMetadata parts string[] regexOutputs = r.RegexPattern.Replace(line.Substring(start), r.RegexReplacement).Split('\n'); var headerResult = new DbProteinName(); string searchterm = null; // assume no webservice lookup unless told otherwise int dbColumnsFound = 0; for (var n = regexOutputs.Length; n-- > 0;) { var split = regexOutputs[n].Split(new[] {':'}, 2); // split on first colon only if (split.Length == 2) { var type = split[0].Trim(); var val = split[1].Trim(); if (val.Contains("${")) // failed match // Not L10N { val = String.Empty; } if (val.Length > 0) { dbColumnsFound++; // valid entry switch (type) { case "name": // Not L10N headerResult.Name = val; break; case "description": // Not L10N headerResult.Description = val; break; case "accession": // Not L10N headerResult.Accession = val; break; case "preferredname": // Not L10N headerResult.PreferredName = val; break; case "gene": // Not L10N headerResult.Gene = val; break; case "species": // Not L10N headerResult.Species = val; break; case "searchterm": // Not L10N dbColumnsFound--; // not actually a db column searchterm = val; break; default: throw new ArgumentOutOfRangeException( String.Format("Unknown Fasta RegEx output formatter type \'{0}\'", // Not L10N regexOutputs[n])); } } } else { throw new ArgumentOutOfRangeException( String.Format("Fasta RegEx failure in \'{0}\'", // Not L10N line.Substring(start))); } } if (headerResult.GetProteinMetadata().HasMissingMetadata()) { if (searchterm != null) { // shave off any alternatives (might look like "IPI:IPI00197700.1|SWISS-PROT:P04638|ENSEMBL:ENSRNOP00000004662|REFSEQ:NP_037244") searchterm = searchterm.Split('|')[0]; // a reasonable accession value will have at least one digit in it, and won't have things like tabs and parens and braces that confuse web services if ("0123456789".Any(searchterm.Contains) && !" \t()[]".Any(searchterm.Contains)) // Not L10N headerResult.SetWebSearchTerm(new WebSearchTerm(searchterm[0], searchterm.Substring(1))); // we'll need to hit the webservices to get this missing info } } if (headerResult.GetProteinMetadata().WebSearchInfo.IsEmpty()) headerResult.SetWebSearchCompleted(); // no search possible if (dbColumnsFound > bestCount) { bestCount = dbColumnsFound; // best match so far - tie goes to the first hit so order matters bestResult = headerResult.GetProteinMetadata(); } } } return bestResult; }
private DbProtein ParseProteinLine(String line) { String[] alternatives = line.Substring(1).Split((char) 1); var protein = new DbProtein(); var proteinMetadata = ParseProteinMetaData(alternatives[0]); var dbName = new DbProteinName(null,proteinMetadata); protein.Names.Add(dbName); for (int i = 1; i < alternatives.Length; i++) { if (alternatives[i].Length > 0) { var altProteinMetadata = ParseProteinMetaData(alternatives[i]); var altName = new DbProteinName(protein,altProteinMetadata); protein.Names.Add(altName); } } return protein; }