// Given the current SRMDocument and a dictionary with associated proteins this method will run the the document tree and // build a new document. The new document will contain all pre-existing FastaSequence nodes and will add the newly matches // FastaSequence nodes. The peptides that were matched to a FastaSequence are removed from their old group. private SrmDocument CreateDocTree(SrmDocument current, List <KeyValuePair <FastaSequence, List <PeptideDocNode> > > proteinAssociations) { var newPeptideGroups = new List <PeptideGroupDocNode>(); // all groups that will be added in the new document // Modifies and adds old groups that still contain unmatched peptides to newPeptideGroups foreach (var nodePepGroup in current.MoleculeGroups) { // Adds all pre-existing proteins to list of groups that will be added in the new document if (nodePepGroup.PeptideGroup is FastaSequence) { newPeptideGroups.Add(nodePepGroup); continue; } // Not a protein var newNodePepGroup = new List <PeptideDocNode>(); foreach (PeptideDocNode nodePep in nodePepGroup.Children) { // If any matches contain the PeptideDocNode it no longer needs to be in the group if (!proteinAssociations.Any(entry => entry.Value.Contains(nodePep))) { // If PeptideDocNode wasn't matched it will stay in the original group newNodePepGroup.Add(nodePep); } } // If the count of items in the group has not changed then it can be assumed that the group is the same // otherwise if there is a different count and it is not 0 then we want to add the modified group to the // set of new groups that will be added to the tree if (newNodePepGroup.Count == nodePepGroup.Children.Count) { newPeptideGroups.Add(nodePepGroup); // No change } else if (newNodePepGroup.Any()) { newPeptideGroups.Add((PeptideGroupDocNode)nodePepGroup.ChangeChildren(newNodePepGroup.ToArray())); } } // Adds all new groups/proteins to newPeptideGroups foreach (var keyValuePair in proteinAssociations) { var protein = keyValuePair.Key; var children = new List <PeptideDocNode>(); foreach (var oldChild in keyValuePair.Value) { children.Add(ChangeFastaSequence(current.Settings, oldChild, protein)); } var peptideGroupDocNode = new PeptideGroupDocNode(protein, protein.Name, protein.Description, children.ToArray()); newPeptideGroups.Add(peptideGroupDocNode); } return((SrmDocument)current.ChangeChildrenChecked(newPeptideGroups.ToArray())); }
private SrmDocument ExcludePeptidesFromDocument(SrmDocument srmDocument) { List <DocNode> children = new List <DocNode>(); foreach (var docNode in srmDocument.Children) { children.Add(!PeptideGroupDocNodes.Contains(docNode) ? docNode : ExcludePeptides((PeptideGroupDocNode)docNode)); } return((SrmDocument)srmDocument.ChangeChildrenChecked(children)); }
/// <summary> /// Removes all nodes that are not listed in a set to preserve, or which /// contain a node that is listed in the set to preserve. Preserved nodes /// which contain no other preserved nodes preserve all their children. /// </summary> /// <param name="document">The document to be modified</param> /// <param name="preserveNodes">Nodes to preserve</param> /// <returns>A new copy of the document with preserved children, or an empty /// document, if nothing was preserved</returns> public static SrmDocument RemoveAllBut(this SrmDocument document, IEnumerable <DocNode> preserveNodes) { var preserveIndexes = new HashSet <int>(); foreach (var node in preserveNodes) { preserveIndexes.Add(node.Id.GlobalIndex); } return((SrmDocument)(RemoveAllBut(document, preserveIndexes) ?? // If nothing was preserved, return an empty document document.ChangeChildrenChecked(new DocNode[0]))); }
private SrmDocument LookupProteinMetadata(SrmDocument docOrig, IProgressMonitor progressMonitor) { lock (_processedNodes) { // Check to make sure this operation was not canceled while this thread was // waiting to acquire the lock. This also cleans up pending work. if (progressMonitor.IsCanceled) { return(null); } IProgressStatus progressStatus = new ProgressStatus(Resources.ProteinMetadataManager_LookupProteinMetadata_resolving_protein_details); int nResolved = 0; int nUnresolved = docOrig.PeptideGroups.Select(pg => pg.ProteinMetadata.NeedsSearch()).Count(); if ((nUnresolved > 0) && !docOrig.Settings.PeptideSettings.BackgroundProteome.IsNone) { // Do a quick check to see if background proteome already has the info if (!docOrig.Settings.PeptideSettings.BackgroundProteome.NeedsProteinMetadataSearch) { try { using (var proteomeDb = docOrig.Settings.PeptideSettings.BackgroundProteome.OpenProteomeDb()) { foreach (PeptideGroupDocNode nodePepGroup in docOrig.PeptideGroups) { if (_processedNodes.ContainsKey(nodePepGroup.Id.GlobalIndex)) { // We did this before we were interrupted progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved)); } else if (nodePepGroup.ProteinMetadata.NeedsSearch()) { var proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.Name); if ((proteinMetadata == null) && !Equals(nodePepGroup.Name, nodePepGroup.OriginalName)) { proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.OriginalName); // Original name might hit } if ((proteinMetadata == null) && !String.IsNullOrEmpty(nodePepGroup.ProteinMetadata.Accession)) { proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.ProteinMetadata.Accession); // Parsed accession might hit } if ((proteinMetadata != null) && !proteinMetadata.NeedsSearch()) { // Background proteome has already resolved this _processedNodes.Add(nodePepGroup.Id.GlobalIndex, proteinMetadata); progressMonitor.UpdateProgress( progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved)); } } if (progressMonitor.IsCanceled) { progressMonitor.UpdateProgress(progressStatus.Cancel()); return(null); } } } } // ReSharper disable once EmptyGeneralCatchClause catch { // The protDB file is busy, or some other issue - just go directly to web } } } if (nResolved != nUnresolved) { try { // Now go to the web for more protein metadata (or pretend to, depending on WebEnabledFastaImporter.DefaultWebAccessMode) var docNodesWithUnresolvedProteinMetadata = new Dictionary <ProteinSearchInfo, PeptideGroupDocNode>(); var proteinsToSearch = new List <ProteinSearchInfo>(); foreach (PeptideGroupDocNode node in docOrig.PeptideGroups) { if (node.ProteinMetadata.NeedsSearch() && !_processedNodes.ContainsKey(node.Id.GlobalIndex)) // Did we already process this? { var proteinMetadata = node.ProteinMetadata; if (proteinMetadata.WebSearchInfo.IsEmpty()) // Never even been hit with regex { // Use Regexes to get some metadata, and a search term var parsedProteinMetaData = FastaImporter.ParseProteinMetaData(proteinMetadata); if ((parsedProteinMetaData == null) || Equals(parsedProteinMetaData.Merge(proteinMetadata), proteinMetadata.SetWebSearchCompleted())) { // That didn't parse well enough to make a search term, or didn't add any new info - just set it as searched so we don't keep trying _processedNodes.Add(node.Id.GlobalIndex, proteinMetadata.SetWebSearchCompleted()); if (progressMonitor.IsCanceled) { progressMonitor.UpdateProgress(progressStatus.Cancel()); return(null); } progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved)); proteinMetadata = null; // No search to be done } else { proteinMetadata = proteinMetadata.Merge(parsedProteinMetaData); // Fill in any gaps with parsed info } } if (proteinMetadata != null) { // We note the sequence length because it's useful in disambiguating search results proteinsToSearch.Add(new ProteinSearchInfo(new DbProteinName(null, proteinMetadata), node.PeptideGroup.Sequence == null ? 0 : node.PeptideGroup.Sequence.Length)); docNodesWithUnresolvedProteinMetadata.Add(proteinsToSearch.Last(), node); } } } if (progressMonitor.IsCanceled) { progressMonitor.UpdateProgress(progressStatus.Cancel()); return(null); } progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved / nUnresolved)); // Now we actually hit the internet if (proteinsToSearch.Any()) { foreach (var result in FastaImporter.DoWebserviceLookup(proteinsToSearch, progressMonitor, false)) // Resolve them all, now { Debug.Assert(!result.GetProteinMetadata().NeedsSearch()); _processedNodes.Add(docNodesWithUnresolvedProteinMetadata[result].Id.GlobalIndex, result.GetProteinMetadata()); if (progressMonitor.IsCanceled) { progressMonitor.UpdateProgress(progressStatus.Cancel()); return(null); } progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved)); } } } catch (OperationCanceledException) { progressMonitor.UpdateProgress(progressStatus.Cancel()); return(null); } } // And finally write back to the document var listProteins = new List <PeptideGroupDocNode>(); foreach (PeptideGroupDocNode node in docOrig.MoleculeGroups) { if (_processedNodes.ContainsKey(node.Id.GlobalIndex)) { listProteins.Add(node.ChangeProteinMetadata(_processedNodes[node.Id.GlobalIndex])); } else { listProteins.Add(node); } } var docNew = docOrig.ChangeChildrenChecked(listProteins.Cast <DocNode>().ToArray()); progressMonitor.UpdateProgress(progressStatus.Complete()); return((SrmDocument)docNew); } }
public SrmDocument Refine(SrmDocument document, SrmSettingsChangeMonitor progressMonitor) { HashSet<int> outlierIds = new HashSet<int>(); if (RTRegressionThreshold.HasValue) { // TODO: Move necessary code into Model. var outliers = RTLinearRegressionGraphPane.CalcOutliers(document, RTRegressionThreshold.Value, RTRegressionPrecision, UseBestResult); foreach (var nodePep in outliers) outlierIds.Add(nodePep.Id.GlobalIndex); } HashSet<RefinementIdentity> includedPeptides = (RemoveRepeatedPeptides ? new HashSet<RefinementIdentity>() : null); HashSet<RefinementIdentity> repeatedPeptides = (RemoveDuplicatePeptides ? new HashSet<RefinementIdentity>() : null); Dictionary<RefinementIdentity, List<int>> acceptedPeptides = null; if (AcceptedPeptides != null) { acceptedPeptides = new Dictionary<RefinementIdentity, List<int>>(); foreach (var peptideCharge in AcceptedPeptides) { List<int> charges; if (!acceptedPeptides.TryGetValue(new RefinementIdentity(peptideCharge.Sequence), out charges)) { charges = (peptideCharge.Charge.HasValue ? new List<int> {peptideCharge.Charge.Value} : null); acceptedPeptides.Add(new RefinementIdentity(peptideCharge.Sequence), charges); } else if (charges != null) { if (peptideCharge.Charge.HasValue) charges.Add(peptideCharge.Charge.Value); else acceptedPeptides[new RefinementIdentity(peptideCharge.Sequence)] = null; } } } HashSet<string> acceptedProteins = (AcceptedProteins != null ? new HashSet<string>(AcceptedProteins) : null); var listPepGroups = new List<PeptideGroupDocNode>(); // Excluding proteins with too few peptides, since they can impact results // of the duplicate peptide check. int minPeptides = MinPeptidesPerProtein ?? 0; foreach (PeptideGroupDocNode nodePepGroup in document.Children) { if (progressMonitor != null) progressMonitor.ProcessGroup(nodePepGroup); if (acceptedProteins != null && !acceptedProteins.Contains(GetAcceptProteinKey(nodePepGroup))) continue; PeptideGroupDocNode nodePepGroupRefined = nodePepGroup; // If auto-managing all peptides, make sure this flag is set correctly, // and update the peptides list, if necessary. if (AutoPickPeptidesAll && nodePepGroup.AutoManageChildren == AutoPickChildrenOff) { nodePepGroupRefined = (PeptideGroupDocNode) nodePepGroupRefined.ChangeAutoManageChildren(!AutoPickChildrenOff); var settings = document.Settings; if (!AutoPickChildrenOff && !settings.PeptideSettings.Filter.AutoSelect) settings = settings.ChangePeptideFilter(filter => filter.ChangeAutoSelect(true)); nodePepGroupRefined = nodePepGroupRefined.ChangeSettings(settings, new SrmSettingsDiff(true, false, false, false, false, false)); } nodePepGroupRefined = Refine(nodePepGroupRefined, document, outlierIds, includedPeptides, repeatedPeptides, acceptedPeptides, progressMonitor); if (nodePepGroupRefined.Children.Count < minPeptides) continue; listPepGroups.Add(nodePepGroupRefined); } // Need a second pass, if all duplicate peptides should be removed, // and duplicates were found. if (repeatedPeptides != null && repeatedPeptides.Count > 0) { var listPepGroupsFiltered = new List<PeptideGroupDocNode>(); foreach (PeptideGroupDocNode nodePepGroup in listPepGroups) { var listPeptides = new List<PeptideDocNode>(); foreach (PeptideDocNode nodePep in nodePepGroup.Children) { var identity = nodePep.Peptide.IsCustomIon ? new RefinementIdentity(nodePep.Peptide.CustomIon) : new RefinementIdentity(document.Settings.GetModifiedSequence(nodePep)); if (!repeatedPeptides.Contains(identity)) listPeptides.Add(nodePep); } PeptideGroupDocNode nodePepGroupRefined = (PeptideGroupDocNode) nodePepGroup.ChangeChildrenChecked(listPeptides.ToArray(), true); if (nodePepGroupRefined.Children.Count < minPeptides) continue; listPepGroupsFiltered.Add(nodePepGroupRefined); } listPepGroups = listPepGroupsFiltered; } return (SrmDocument) document.ChangeChildrenChecked(listPepGroups.ToArray(), true); }
private SrmDocument LookupProteinMetadata(SrmDocument docOrig, IProgressMonitor progressMonitor) { lock (_processedNodes) { // Check to make sure this operation was not canceled while this thread was // waiting to acquire the lock. This also cleans up pending work. if (progressMonitor.IsCanceled) { return(null); } IProgressStatus progressStatus = new ProgressStatus(Resources.ProteinMetadataManager_LookupProteinMetadata_resolving_protein_details); int nResolved = 0; int nUnresolved = docOrig.PeptideGroups.Select(pg => pg.ProteinMetadata.NeedsSearch()).Count(); if ((nUnresolved > 0) && !docOrig.Settings.PeptideSettings.BackgroundProteome.IsNone) { // Do a quick check to see if background proteome already has the info if (!docOrig.Settings.PeptideSettings.BackgroundProteome.NeedsProteinMetadataSearch) { try { using (var proteomeDb = docOrig.Settings.PeptideSettings.BackgroundProteome.OpenProteomeDb()) { foreach (PeptideGroupDocNode nodePepGroup in docOrig.PeptideGroups) { if (_processedNodes.ContainsKey(nodePepGroup.Id.GlobalIndex)) { // We did this before we were interrupted nResolved++; } else if (nodePepGroup.ProteinMetadata.NeedsSearch()) { var proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.Name); if ((proteinMetadata == null) && !Equals(nodePepGroup.Name, nodePepGroup.OriginalName)) { proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.OriginalName); // Original name might hit } if ((proteinMetadata == null) && !String.IsNullOrEmpty(nodePepGroup.ProteinMetadata.Accession)) { proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.ProteinMetadata.Accession); // Parsed accession might hit } if ((proteinMetadata != null) && !proteinMetadata.NeedsSearch()) { // Background proteome has already resolved this _processedNodes.Add(nodePepGroup.Id.GlobalIndex, proteinMetadata); nResolved++; } } if (!UpdatePrecentComplete(progressMonitor, 100 * nResolved / nUnresolved, ref progressStatus)) { return(null); } } } } // ReSharper disable once EmptyGeneralCatchClause catch { // The protDB file is busy, or some other issue - just go directly to web } } } if (nResolved != nUnresolved) { try { // Now go to the web for more protein metadata (or pretend to, depending on WebEnabledFastaImporter.DefaultWebAccessMode) var docNodesWithUnresolvedProteinMetadata = new Dictionary <ProteinSearchInfo, PeptideGroupDocNode>(); var proteinsToSearch = new List <ProteinSearchInfo>(); foreach (PeptideGroupDocNode node in docOrig.PeptideGroups) { if (node.ProteinMetadata.NeedsSearch() && !_processedNodes.ContainsKey(node.Id.GlobalIndex)) // Did we already process this? { var proteinMetadata = node.ProteinMetadata; if (proteinMetadata.WebSearchInfo.IsEmpty()) // Never even been hit with regex { // Use Regexes to get some metadata, and a search term var parsedProteinMetaData = FastaImporter.ParseProteinMetaData(proteinMetadata); if ((parsedProteinMetaData == null) || Equals(parsedProteinMetaData.Merge(proteinMetadata), proteinMetadata.SetWebSearchCompleted())) { // That didn't parse well enough to make a search term, or didn't add any new info - just set it as searched so we don't keep trying _processedNodes.Add(node.Id.GlobalIndex, proteinMetadata.SetWebSearchCompleted()); if (!UpdatePrecentComplete(progressMonitor, 100 * nResolved++ / nUnresolved, ref progressStatus)) { return(null); } proteinMetadata = null; // No search to be done } else { proteinMetadata = proteinMetadata.Merge(parsedProteinMetaData); // Fill in any gaps with parsed info } } if (proteinMetadata != null) { // We note the sequence length because it's useful in disambiguating search results proteinsToSearch.Add(new ProteinSearchInfo(new DbProteinName(null, proteinMetadata), node.PeptideGroup.Sequence == null ? 0 : node.PeptideGroup.Sequence.Length)); docNodesWithUnresolvedProteinMetadata.Add(proteinsToSearch.Last(), node); } } } if (!UpdatePrecentComplete(progressMonitor, 100 * nResolved / nUnresolved, ref progressStatus)) { return(null); } // Now we actually hit the internet if (proteinsToSearch.Any()) { foreach (var result in FastaImporter.DoWebserviceLookup(proteinsToSearch, progressMonitor, false)) // Resolve them all, now { Assume.IsTrue(!result.GetProteinMetadata().NeedsSearch()); _processedNodes.Add(docNodesWithUnresolvedProteinMetadata[result].Id.GlobalIndex, result.GetProteinMetadata()); if (!UpdatePrecentComplete(progressMonitor, 100 * nResolved++ / nUnresolved, ref progressStatus)) { return(null); } } } } catch (OperationCanceledException) { progressMonitor.UpdateProgress(progressStatus.Cancel()); return(null); } } // And finally write back to the document var listProteins = new List <PeptideGroupDocNode>(); foreach (PeptideGroupDocNode node in docOrig.MoleculeGroups) { if (_processedNodes.TryGetValue(node.Id.GlobalIndex, out var proteinMetadata)) { // Compare existing and proposed metadata, ignoring name difference in case user changed // the name manually in the Targets tree while a background metadata lookup was going on, and // ignoring web search details since the existing node probably hasn't any yet. // // This fixes issue https://skyline.ms/announcements/home/support/thread.view?rowId=49107 in which: // the user pasted a protein sequence into the Targets tree // then tried to type in a name to replace the default assigned name "sequence1" // after a few seconds the displayed name reverted to "sequence1" upon background protein metadata search completion // N.B. as this is timing dependent, and our automated tests are mandated to not require internet // access, writing a test for this fix (i.e. adding timings to the fake web lookup system) proved to // be tricky and finally deemed not worth the effort for this fairly obscure problem. if (!Equals(node.ProteinMetadata.Name, proteinMetadata.Name) && // Different name Equals(node.ProteinMetadata.ChangeName(proteinMetadata.Name).ClearWebSearchInfo(), // But otherwise identical proteinMetadata.ClearWebSearchInfo())) { // Leave (apparently user-renamed) node alone, and note the web search that was actually used. listProteins.Add(node.ChangeProteinMetadata(node.ProteinMetadata.ChangeWebSearchInfo(proteinMetadata.WebSearchInfo))); } else { // Update the protein metadata for this node, if any listProteins.Add(node.ChangeProteinMetadata(proteinMetadata)); } } else { // Not yet processed listProteins.Add(node); } } var docNew = docOrig.ChangeChildrenChecked(listProteins.Cast <DocNode>().ToArray()); progressMonitor.UpdateProgress(progressStatus.Complete()); return((SrmDocument)docNew); } }
/// <summary> /// Enumerate all document peptides. If a library peptide already exists in the /// current document, update the transition groups for that document peptide and /// remove the peptide from the list to add. /// </summary> /// <param name="document">The starting document</param> /// <param name="dictCopy">A dictionary of peptides to peptide matches. All added /// peptides are removed</param> /// <param name="toPath">Currently selected path.</param> /// <param name="selectedPath">Selected path after the nodes have been added</param> /// <returns>A new document with precursors for existing petides added</returns> private SrmDocument UpdateExistingPeptides(SrmDocument document, Dictionary <PeptideSequenceModKey, PeptideMatch> dictCopy, IdentityPath toPath, out IdentityPath selectedPath) { selectedPath = toPath; IList <DocNode> nodePepGroups = new List <DocNode>(); var keysAddedWithoutMatch = new SortedDictionary <PeptideSequenceModKey, PeptideMatch>(); foreach (PeptideGroupDocNode nodePepGroup in document.MoleculeGroups) { IList <DocNode> nodePeps = new List <DocNode>(); foreach (PeptideDocNode nodePep in nodePepGroup.Children) { var key = nodePep.SequenceKey; PeptideMatch peptideMatch; // If this peptide is not in our list of peptides to add, // or if we are in a peptide list and this peptide has been matched to protein(s), // then we don't touch this particular node. if (!dictCopy.TryGetValue(key, out peptideMatch) || (nodePepGroup.IsPeptideList && (peptideMatch.Proteins != null && peptideMatch.Proteins.Any()))) { nodePeps.Add(nodePep); if (keysAddedWithoutMatch.ContainsKey(key)) { keysAddedWithoutMatch.Add(key, new PeptideMatch(null, null, false)); } } else { var proteinName = nodePepGroup.PeptideGroup.Name; int indexProtein = -1; if (peptideMatch.Proteins != null) { indexProtein = peptideMatch.Proteins.IndexOf(protein => Equals(protein.ProteinMetadata.Name, proteinName)); // If the user has opted to filter duplicate peptides, remove this peptide from the list to // add and continue. if (FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.NoDuplicates && peptideMatch.Proteins.Count > 1) { dictCopy.Remove(key); nodePeps.Add(nodePep); continue; } // [1] If this protein is not the first match, and the user has opted to add only the first occurence, // [2] or if this protein is not one of the matches, and [2a] we are either not in a peptide list // [2b] or the user has opted to filter unmatched peptides, ignore this particular node. if ((indexProtein > 0 && FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.FirstOccurence) || (indexProtein == -1 && (!nodePepGroup.IsPeptideList || !Properties.Settings.Default.LibraryPeptidesAddUnmatched))) { nodePeps.Add(nodePep); continue; } } // Update the children of the peptide in the document to include the charge state of the peptide we are adding. PeptideDocNode nodePepMatch = peptideMatch.NodePep; PeptideDocNode nodePepSettings = null; var newChildren = nodePep.Children.ToList(); Identity nodeGroupChargeId = newChildren.Count > 0 ? newChildren[0].Id : null; foreach (TransitionGroupDocNode nodeGroup in nodePepMatch.Children) { var chargeGroup = nodeGroup.TransitionGroup.PrecursorAdduct; if (nodePep.HasChildCharge(chargeGroup)) { SkippedPeptideCount++; } else { if (nodePepSettings == null) { nodePepSettings = nodePepMatch.ChangeSettings(document.Settings, SrmSettingsDiff.ALL); } TransitionGroupDocNode nodeGroupCharge = (TransitionGroupDocNode)nodePepSettings.FindNode(nodeGroup.TransitionGroup); if (nodeGroupCharge == null) { continue; } if (peptideMatch.Proteins != null && peptideMatch.Proteins.Count > 1) { // If we may be adding this specific node to the document more than once, create a copy of it so that // we don't have two nodes with the same global id. nodeGroupCharge = (TransitionGroupDocNode)nodeGroupCharge.CopyId(); nodeGroupCharge = (TransitionGroupDocNode)nodeGroupCharge.ChangeChildren( nodeGroupCharge.Children.ToList().ConvertAll(child => child.CopyId())); } nodeGroupChargeId = nodeGroupCharge.Id; newChildren.Add(nodeGroupCharge); } } // Sort the new peptide children. newChildren.Sort(Peptide.CompareGroups); var nodePepAdd = nodePep.ChangeChildrenChecked(newChildren); // If we have changed the children, need to set automanage children to false. if (nodePep.AutoManageChildren && !ReferenceEquals(nodePep, nodePepAdd)) { nodePepAdd = nodePepAdd.ChangeAutoManageChildren(false); } // Change the selected path. if (PeptideMatches.Count == 1) { selectedPath = nodeGroupChargeId == null ? new IdentityPath(new[] { nodePepGroup.Id, nodePepAdd.Id }) : new IdentityPath(new[] { nodePepGroup.Id, nodePepAdd.Id, nodeGroupChargeId }); } nodePeps.Add(nodePepAdd); // Remove this peptide from the list of peptides we need to add to the document dictCopy.Remove(key); if (peptideMatch.Proteins != null) { if (indexProtein != -1) { // Remove this protein from the list of proteins associated with the peptide. peptideMatch.Proteins.RemoveAt(indexProtein); } // If this peptide has not yet been added to all matched proteins, // put it back in the list of peptides to add. if (peptideMatch.Proteins.Count != 0 && FilterMultipleProteinMatches != BackgroundProteome.DuplicateProteinsFilter.FirstOccurence) { dictCopy.Add(key, peptideMatch); } } } } nodePepGroups.Add(nodePepGroup.ChangeChildrenChecked(nodePeps)); } return((SrmDocument)document.ChangeChildrenChecked(nodePepGroups)); }
private SrmDocument LookupProteinMetadata(SrmDocument docOrig, IProgressMonitor progressMonitor) { lock (_processedNodes) { // Check to make sure this operation was not canceled while this thread was // waiting to acquire the lock. This also cleans up pending work. if (progressMonitor.IsCanceled) return null; var progressStatus = new ProgressStatus(Resources.ProteinMetadataManager_LookupProteinMetadata_resolving_protein_details); int nResolved = 0; int nUnresolved = docOrig.PeptideGroups.Select(pg => pg.ProteinMetadata.NeedsSearch()).Count(); if ((nUnresolved > 0) && !docOrig.Settings.PeptideSettings.BackgroundProteome.IsNone) { // Do a quick check to see if background proteome already has the info if (!docOrig.Settings.PeptideSettings.BackgroundProteome.NeedsProteinMetadataSearch) { try { using (var proteomeDb = docOrig.Settings.PeptideSettings.BackgroundProteome.OpenProteomeDb()) { foreach (PeptideGroupDocNode nodePepGroup in docOrig.PeptideGroups) { if (_processedNodes.ContainsKey(nodePepGroup.Id.GlobalIndex)) { // We did this before we were interrupted progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved)); } else if (nodePepGroup.ProteinMetadata.NeedsSearch()) { var proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.Name); if ((proteinMetadata == null) && !Equals(nodePepGroup.Name, nodePepGroup.OriginalName)) proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.OriginalName); // Original name might hit if ((proteinMetadata == null) && !String.IsNullOrEmpty(nodePepGroup.ProteinMetadata.Accession)) proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.ProteinMetadata.Accession); // Parsed accession might hit if ((proteinMetadata != null) && !proteinMetadata.NeedsSearch()) { // Background proteome has already resolved this _processedNodes.Add(nodePepGroup.Id.GlobalIndex, proteinMetadata); progressMonitor.UpdateProgress( progressStatus = progressStatus.ChangePercentComplete(100*nResolved++/nUnresolved)); } } if (progressMonitor.IsCanceled) { progressMonitor.UpdateProgress(progressStatus.Cancel()); return null; } } } } // ReSharper disable once EmptyGeneralCatchClause catch { // The protDB file is busy, or some other issue - just go directly to web } } } if (nResolved != nUnresolved) { try { // Now go to the web for more protein metadata (or pretend to, depending on WebEnabledFastaImporter.DefaultWebAccessMode) var docNodesWithUnresolvedProteinMetadata = new Dictionary<ProteinSearchInfo,PeptideGroupDocNode>(); var proteinsToSearch = new List<ProteinSearchInfo>(); foreach (PeptideGroupDocNode node in docOrig.PeptideGroups) { if (node.ProteinMetadata.NeedsSearch() && !_processedNodes.ContainsKey(node.Id.GlobalIndex)) // Did we already process this? { var proteinMetadata = node.ProteinMetadata; if (proteinMetadata.WebSearchInfo.IsEmpty()) // Never even been hit with regex { // Use Regexes to get some metadata, and a search term var parsedProteinMetaData = FastaImporter.ParseProteinMetaData(proteinMetadata); if ((parsedProteinMetaData == null) || Equals(parsedProteinMetaData.Merge(proteinMetadata),proteinMetadata.SetWebSearchCompleted())) { // That didn't parse well enough to make a search term, or didn't add any new info - just set it as searched so we don't keep trying _processedNodes.Add(node.Id.GlobalIndex, proteinMetadata.SetWebSearchCompleted()); progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved)); proteinMetadata = null; // No search to be done } else { proteinMetadata = proteinMetadata.Merge(parsedProteinMetaData); // Fill in any gaps with parsed info } } if (proteinMetadata != null) { // We note the sequence length because it's useful in disambiguating search results proteinsToSearch.Add(new ProteinSearchInfo(new DbProteinName(null, proteinMetadata), node.PeptideGroup.Sequence == null ? 0 : node.PeptideGroup.Sequence.Length)); docNodesWithUnresolvedProteinMetadata.Add(proteinsToSearch.Last(), node); } } } if (progressMonitor.IsCanceled) { progressMonitor.UpdateProgress(progressStatus.Cancel()); return null; } progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved / nUnresolved)); // Now we actually hit the internet if (proteinsToSearch.Any()) { foreach (var result in FastaImporter.DoWebserviceLookup(proteinsToSearch, progressMonitor, false)) // Resolve them all, now { Debug.Assert(!result.GetProteinMetadata().NeedsSearch()); _processedNodes.Add(docNodesWithUnresolvedProteinMetadata[result].Id.GlobalIndex, result.GetProteinMetadata()); progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved)); } } } catch (OperationCanceledException) { progressMonitor.UpdateProgress(progressStatus.Cancel()); return null; } } // And finally write back to the document var listProteins = new List<PeptideGroupDocNode>(); foreach (PeptideGroupDocNode node in docOrig.MoleculeGroups) { if (_processedNodes.ContainsKey(node.Id.GlobalIndex)) { listProteins.Add(node.ChangeProteinMetadata(_processedNodes[node.Id.GlobalIndex])); } else { listProteins.Add(node); } } var docNew = docOrig.ChangeChildrenChecked(listProteins.Cast<DocNode>().ToArray()); progressMonitor.UpdateProgress(progressStatus.Complete()); return (SrmDocument)docNew; } }
/// <summary> /// Enumerate all document peptides. If a library peptide already exists in the /// current document, update the transition groups for that document peptide and /// remove the peptide from the list to add. /// </summary> /// <param name="document">The starting document</param> /// <param name="dictCopy">A dictionary of peptides to peptide matches. All added /// peptides are removed</param> /// <param name="toPath">Currently selected path.</param> /// <param name="selectedPath">Selected path after the nodes have been added</param> /// <returns>A new document with precursors for existing petides added</returns> private SrmDocument UpdateExistingPeptides(SrmDocument document, Dictionary<PeptideSequenceModKey, PeptideMatch> dictCopy, IdentityPath toPath, out IdentityPath selectedPath) { selectedPath = toPath; IList<DocNode> nodePepGroups = new List<DocNode>(); foreach (PeptideGroupDocNode nodePepGroup in document.PeptideGroups) { IList<DocNode> nodePeps = new List<DocNode>(); foreach (PeptideDocNode nodePep in nodePepGroup.Children) { var key = nodePep.SequenceKey; PeptideMatch peptideMatch; // If this peptide is not in our list of peptides to add, // or if we are in a peptide list and this peptide has been matched to protein(s), // then we don't touch this particular node. if (!dictCopy.TryGetValue(key, out peptideMatch) || (nodePepGroup.IsPeptideList && (peptideMatch.Proteins != null && peptideMatch.Proteins.Any()))) nodePeps.Add(nodePep); else { var proteinName = nodePepGroup.PeptideGroup.Name; int indexProtein = -1; if (peptideMatch.Proteins != null) { indexProtein = peptideMatch.Proteins.IndexOf(protein => Equals(protein.ProteinMetadata.Name, proteinName)); // If the user has opted to filter duplicate peptides, remove this peptide from the list to // add and continue. if(FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.NoDuplicates && peptideMatch.Proteins.Count > 1) { dictCopy.Remove(key); nodePeps.Add(nodePep); continue; } // [1] If this protein is not the first match, and the user has opted to add only the first occurence, // [2] or if this protein is not one of the matches, and [2a] we are either not in a peptide list // [2b] or the user has opted to filter unmatched peptides, ignore this particular node. if((indexProtein > 0 && FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.FirstOccurence) || (indexProtein == -1 && (!nodePepGroup.IsPeptideList || !Properties.Settings.Default.LibraryPeptidesAddUnmatched))) { nodePeps.Add(nodePep); continue; } } // Update the children of the peptide in the document to include the charge state of the peptide we are adding. PeptideDocNode nodePepMatch = peptideMatch.NodePep; PeptideDocNode nodePepSettings = null; var newChildren = nodePep.Children.ToList(); Identity nodeGroupChargeId = newChildren.Count > 0 ? newChildren[0].Id : null; foreach (TransitionGroupDocNode nodeGroup in nodePepMatch.Children) { int chargeGroup = nodeGroup.TransitionGroup.PrecursorCharge; if (nodePep.HasChildCharge(chargeGroup)) SkippedPeptideCount++; else { if (nodePepSettings == null) nodePepSettings = nodePepMatch.ChangeSettings(document.Settings, SrmSettingsDiff.ALL); TransitionGroupDocNode nodeGroupCharge = (TransitionGroupDocNode) nodePepSettings.FindNode(nodeGroup.TransitionGroup); if (nodeGroupCharge == null) { continue; } if(peptideMatch.Proteins != null && peptideMatch.Proteins.Count() > 1) { // If we may be adding this specific node to the document more than once, create a copy of it so that // we don't have two nodes with the same global id. nodeGroupCharge = (TransitionGroupDocNode) nodeGroupCharge.CopyId(); nodeGroupCharge = (TransitionGroupDocNode) nodeGroupCharge.ChangeChildren( nodeGroupCharge.Children.ToList().ConvertAll(child => child.CopyId())); } nodeGroupChargeId = nodeGroupCharge.Id; newChildren.Add(nodeGroupCharge); } } // Sort the new peptide children. newChildren.Sort(Peptide.CompareGroups); var nodePepAdd = nodePep.ChangeChildrenChecked(newChildren); // If we have changed the children, need to set automanage children to false. if (nodePep.AutoManageChildren && !ReferenceEquals(nodePep, nodePepAdd)) nodePepAdd = nodePepAdd.ChangeAutoManageChildren(false); // Change the selected path. if (PeptideMatches.Count == 1) { selectedPath = nodeGroupChargeId == null ? new IdentityPath(new[] { nodePepGroup.Id, nodePepAdd.Id }) : new IdentityPath(new[] { nodePepGroup.Id, nodePepAdd.Id, nodeGroupChargeId }); } nodePeps.Add(nodePepAdd); // Remove this peptide from the list of peptides we need to add to the document dictCopy.Remove(key); if (peptideMatch.Proteins != null) { if (indexProtein != -1) // Remove this protein from the list of proteins associated with the peptide. peptideMatch.Proteins.RemoveAt(indexProtein); // If this peptide has not yet been added to all matched proteins, // put it back in the list of peptides to add. if (peptideMatch.Proteins.Count != 0 && FilterMultipleProteinMatches != BackgroundProteome.DuplicateProteinsFilter.FirstOccurence) dictCopy.Add(key, peptideMatch); } } } nodePepGroups.Add(nodePepGroup.ChangeChildrenChecked(nodePeps)); } return (SrmDocument) document.ChangeChildrenChecked(nodePepGroups); }