Esempio n. 1
0
        // Given the current SRMDocument and a dictionary with associated proteins this method will run the the document tree and
        // build a new document.  The new document will contain all pre-existing FastaSequence nodes and will add the newly matches
        // FastaSequence nodes.  The peptides that were matched to a FastaSequence are removed from their old group.
        private SrmDocument CreateDocTree(SrmDocument current, List <KeyValuePair <FastaSequence, List <PeptideDocNode> > > proteinAssociations)
        {
            var newPeptideGroups = new List <PeptideGroupDocNode>(); // all groups that will be added in the new document

            // Modifies and adds old groups that still contain unmatched peptides to newPeptideGroups
            foreach (var nodePepGroup in current.MoleculeGroups)
            {
                // Adds all pre-existing proteins to list of groups that will be added in the new document
                if (nodePepGroup.PeptideGroup is FastaSequence)
                {
                    newPeptideGroups.Add(nodePepGroup);
                    continue;
                }

                // Not a protein
                var newNodePepGroup = new List <PeptideDocNode>();

                foreach (PeptideDocNode nodePep in nodePepGroup.Children)
                {
                    // If any matches contain the PeptideDocNode it no longer needs to be in the group
                    if (!proteinAssociations.Any(entry => entry.Value.Contains(nodePep)))
                    {
                        // If PeptideDocNode wasn't matched it will stay in the original group
                        newNodePepGroup.Add(nodePep);
                    }
                }
                // If the count of items in the group has not changed then it can be assumed that the group is the same
                // otherwise if there is a different count and it is not 0 then we want to add the modified group to the
                // set of new groups that will be added to the tree
                if (newNodePepGroup.Count == nodePepGroup.Children.Count)
                {
                    newPeptideGroups.Add(nodePepGroup);  // No change
                }
                else if (newNodePepGroup.Any())
                {
                    newPeptideGroups.Add((PeptideGroupDocNode)nodePepGroup.ChangeChildren(newNodePepGroup.ToArray()));
                }
            }

            // Adds all new groups/proteins to newPeptideGroups
            foreach (var keyValuePair in proteinAssociations)
            {
                var protein  = keyValuePair.Key;
                var children = new List <PeptideDocNode>();
                foreach (var oldChild in keyValuePair.Value)
                {
                    children.Add(ChangeFastaSequence(current.Settings, oldChild, protein));
                }
                var peptideGroupDocNode = new PeptideGroupDocNode(protein, protein.Name, protein.Description, children.ToArray());
                newPeptideGroups.Add(peptideGroupDocNode);
            }

            return((SrmDocument)current.ChangeChildrenChecked(newPeptideGroups.ToArray()));
        }
Esempio n. 2
0
        private SrmDocument ExcludePeptidesFromDocument(SrmDocument srmDocument)
        {
            List <DocNode> children = new List <DocNode>();

            foreach (var docNode in srmDocument.Children)
            {
                children.Add(!PeptideGroupDocNodes.Contains(docNode)
                                 ? docNode
                                 : ExcludePeptides((PeptideGroupDocNode)docNode));
            }
            return((SrmDocument)srmDocument.ChangeChildrenChecked(children));
        }
Esempio n. 3
0
        /// <summary>
        /// Removes all nodes that are not listed in a set to preserve, or which
        /// contain a node that is listed in the set to preserve.  Preserved nodes
        /// which contain no other preserved nodes preserve all their children.
        /// </summary>
        /// <param name="document">The document to be modified</param>
        /// <param name="preserveNodes">Nodes to preserve</param>
        /// <returns>A new copy of the document with preserved children, or an empty
        /// document, if nothing was preserved</returns>
        public static SrmDocument RemoveAllBut(this SrmDocument document, IEnumerable <DocNode> preserveNodes)
        {
            var preserveIndexes = new HashSet <int>();

            foreach (var node in preserveNodes)
            {
                preserveIndexes.Add(node.Id.GlobalIndex);
            }

            return((SrmDocument)(RemoveAllBut(document, preserveIndexes) ??
                                 // If nothing was preserved, return an empty document
                                 document.ChangeChildrenChecked(new DocNode[0])));
        }
        private SrmDocument LookupProteinMetadata(SrmDocument docOrig, IProgressMonitor progressMonitor)
        {
            lock (_processedNodes)
            {
                // Check to make sure this operation was not canceled while this thread was
                // waiting to acquire the lock.  This also cleans up pending work.
                if (progressMonitor.IsCanceled)
                {
                    return(null);
                }

                IProgressStatus progressStatus = new ProgressStatus(Resources.ProteinMetadataManager_LookupProteinMetadata_resolving_protein_details);
                int             nResolved      = 0;
                int             nUnresolved    = docOrig.PeptideGroups.Select(pg => pg.ProteinMetadata.NeedsSearch()).Count();

                if ((nUnresolved > 0) && !docOrig.Settings.PeptideSettings.BackgroundProteome.IsNone)
                {
                    // Do a quick check to see if background proteome already has the info
                    if (!docOrig.Settings.PeptideSettings.BackgroundProteome.NeedsProteinMetadataSearch)
                    {
                        try
                        {
                            using (var proteomeDb = docOrig.Settings.PeptideSettings.BackgroundProteome.OpenProteomeDb())
                            {
                                foreach (PeptideGroupDocNode nodePepGroup in docOrig.PeptideGroups)
                                {
                                    if (_processedNodes.ContainsKey(nodePepGroup.Id.GlobalIndex))
                                    {
                                        // We did this before we were interrupted
                                        progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved));
                                    }
                                    else if (nodePepGroup.ProteinMetadata.NeedsSearch())
                                    {
                                        var proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.Name);
                                        if ((proteinMetadata == null) && !Equals(nodePepGroup.Name, nodePepGroup.OriginalName))
                                        {
                                            proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.OriginalName); // Original name might hit
                                        }
                                        if ((proteinMetadata == null) && !String.IsNullOrEmpty(nodePepGroup.ProteinMetadata.Accession))
                                        {
                                            proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.ProteinMetadata.Accession); // Parsed accession might hit
                                        }
                                        if ((proteinMetadata != null) && !proteinMetadata.NeedsSearch())
                                        {
                                            // Background proteome has already resolved this
                                            _processedNodes.Add(nodePepGroup.Id.GlobalIndex, proteinMetadata);
                                            progressMonitor.UpdateProgress(
                                                progressStatus =
                                                    progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved));
                                        }
                                    }
                                    if (progressMonitor.IsCanceled)
                                    {
                                        progressMonitor.UpdateProgress(progressStatus.Cancel());
                                        return(null);
                                    }
                                }
                            }
                        }
                        // ReSharper disable once EmptyGeneralCatchClause
                        catch
                        {
                            // The protDB file is busy, or some other issue - just go directly to web
                        }
                    }
                }
                if (nResolved != nUnresolved)
                {
                    try
                    {
                        // Now go to the web for more protein metadata (or pretend to, depending on WebEnabledFastaImporter.DefaultWebAccessMode)
                        var docNodesWithUnresolvedProteinMetadata = new Dictionary <ProteinSearchInfo, PeptideGroupDocNode>();
                        var proteinsToSearch = new List <ProteinSearchInfo>();
                        foreach (PeptideGroupDocNode node in docOrig.PeptideGroups)
                        {
                            if (node.ProteinMetadata.NeedsSearch() && !_processedNodes.ContainsKey(node.Id.GlobalIndex)) // Did we already process this?
                            {
                                var proteinMetadata = node.ProteinMetadata;
                                if (proteinMetadata.WebSearchInfo.IsEmpty()) // Never even been hit with regex
                                {
                                    // Use Regexes to get some metadata, and a search term
                                    var parsedProteinMetaData = FastaImporter.ParseProteinMetaData(proteinMetadata);
                                    if ((parsedProteinMetaData == null) || Equals(parsedProteinMetaData.Merge(proteinMetadata), proteinMetadata.SetWebSearchCompleted()))
                                    {
                                        // That didn't parse well enough to make a search term, or didn't add any new info - just set it as searched so we don't keep trying
                                        _processedNodes.Add(node.Id.GlobalIndex, proteinMetadata.SetWebSearchCompleted());
                                        if (progressMonitor.IsCanceled)
                                        {
                                            progressMonitor.UpdateProgress(progressStatus.Cancel());
                                            return(null);
                                        }
                                        progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved));
                                        proteinMetadata = null;  // No search to be done
                                    }
                                    else
                                    {
                                        proteinMetadata = proteinMetadata.Merge(parsedProteinMetaData);  // Fill in any gaps with parsed info
                                    }
                                }
                                if (proteinMetadata != null)
                                {
                                    // We note the sequence length because it's useful in disambiguating search results
                                    proteinsToSearch.Add(new ProteinSearchInfo(new DbProteinName(null, proteinMetadata),
                                                                               node.PeptideGroup.Sequence == null ? 0 : node.PeptideGroup.Sequence.Length));
                                    docNodesWithUnresolvedProteinMetadata.Add(proteinsToSearch.Last(), node);
                                }
                            }
                        }
                        if (progressMonitor.IsCanceled)
                        {
                            progressMonitor.UpdateProgress(progressStatus.Cancel());
                            return(null);
                        }
                        progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved / nUnresolved));

                        // Now we actually hit the internet
                        if (proteinsToSearch.Any())
                        {
                            foreach (var result in FastaImporter.DoWebserviceLookup(proteinsToSearch, progressMonitor, false)) // Resolve them all, now
                            {
                                Debug.Assert(!result.GetProteinMetadata().NeedsSearch());
                                _processedNodes.Add(docNodesWithUnresolvedProteinMetadata[result].Id.GlobalIndex, result.GetProteinMetadata());
                                if (progressMonitor.IsCanceled)
                                {
                                    progressMonitor.UpdateProgress(progressStatus.Cancel());
                                    return(null);
                                }
                                progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved));
                            }
                        }
                    }
                    catch (OperationCanceledException)
                    {
                        progressMonitor.UpdateProgress(progressStatus.Cancel());
                        return(null);
                    }
                }

                // And finally write back to the document
                var listProteins = new List <PeptideGroupDocNode>();
                foreach (PeptideGroupDocNode node in docOrig.MoleculeGroups)
                {
                    if (_processedNodes.ContainsKey(node.Id.GlobalIndex))
                    {
                        listProteins.Add(node.ChangeProteinMetadata(_processedNodes[node.Id.GlobalIndex]));
                    }
                    else
                    {
                        listProteins.Add(node);
                    }
                }
                var docNew = docOrig.ChangeChildrenChecked(listProteins.Cast <DocNode>().ToArray());
                progressMonitor.UpdateProgress(progressStatus.Complete());
                return((SrmDocument)docNew);
            }
        }
Esempio n. 5
0
        public SrmDocument Refine(SrmDocument document, SrmSettingsChangeMonitor progressMonitor)
        {
            HashSet<int> outlierIds = new HashSet<int>();
            if (RTRegressionThreshold.HasValue)
            {
                // TODO: Move necessary code into Model.
                var outliers = RTLinearRegressionGraphPane.CalcOutliers(document,
                    RTRegressionThreshold.Value, RTRegressionPrecision, UseBestResult);

                foreach (var nodePep in outliers)
                    outlierIds.Add(nodePep.Id.GlobalIndex);
            }

            HashSet<RefinementIdentity> includedPeptides = (RemoveRepeatedPeptides ? new HashSet<RefinementIdentity>() : null);
            HashSet<RefinementIdentity> repeatedPeptides = (RemoveDuplicatePeptides ? new HashSet<RefinementIdentity>() : null);
            Dictionary<RefinementIdentity, List<int>> acceptedPeptides = null;
            if (AcceptedPeptides != null)
            {
                acceptedPeptides = new Dictionary<RefinementIdentity, List<int>>();
                foreach (var peptideCharge in AcceptedPeptides)
                {
                    List<int> charges;
                    if (!acceptedPeptides.TryGetValue(new RefinementIdentity(peptideCharge.Sequence), out charges))
                    {
                        charges = (peptideCharge.Charge.HasValue ? new List<int> {peptideCharge.Charge.Value} : null);
                        acceptedPeptides.Add(new RefinementIdentity(peptideCharge.Sequence), charges);
                    }
                    else if (charges != null)
                    {
                        if (peptideCharge.Charge.HasValue)
                            charges.Add(peptideCharge.Charge.Value);
                        else
                            acceptedPeptides[new RefinementIdentity(peptideCharge.Sequence)] = null;
                    }
                }
            }
            HashSet<string> acceptedProteins = (AcceptedProteins != null ? new HashSet<string>(AcceptedProteins) : null);

            var listPepGroups = new List<PeptideGroupDocNode>();
            // Excluding proteins with too few peptides, since they can impact results
            // of the duplicate peptide check.
            int minPeptides = MinPeptidesPerProtein ?? 0;
            foreach (PeptideGroupDocNode nodePepGroup in document.Children)
            {
                if (progressMonitor != null)
                    progressMonitor.ProcessGroup(nodePepGroup);

                if (acceptedProteins != null && !acceptedProteins.Contains(GetAcceptProteinKey(nodePepGroup)))
                    continue;

                PeptideGroupDocNode nodePepGroupRefined = nodePepGroup;
                // If auto-managing all peptides, make sure this flag is set correctly,
                // and update the peptides list, if necessary.
                if (AutoPickPeptidesAll && nodePepGroup.AutoManageChildren == AutoPickChildrenOff)
                {
                    nodePepGroupRefined =
                        (PeptideGroupDocNode) nodePepGroupRefined.ChangeAutoManageChildren(!AutoPickChildrenOff);
                    var settings = document.Settings;
                    if (!AutoPickChildrenOff && !settings.PeptideSettings.Filter.AutoSelect)
                        settings = settings.ChangePeptideFilter(filter => filter.ChangeAutoSelect(true));
                    nodePepGroupRefined = nodePepGroupRefined.ChangeSettings(settings,
                        new SrmSettingsDiff(true, false, false, false, false, false));
                }

                nodePepGroupRefined = Refine(nodePepGroupRefined, document, outlierIds,
                        includedPeptides, repeatedPeptides, acceptedPeptides, progressMonitor);

                if (nodePepGroupRefined.Children.Count < minPeptides)
                    continue;

                listPepGroups.Add(nodePepGroupRefined);
            }

            // Need a second pass, if all duplicate peptides should be removed,
            // and duplicates were found.
            if (repeatedPeptides != null && repeatedPeptides.Count > 0)
            {
                var listPepGroupsFiltered = new List<PeptideGroupDocNode>();
                foreach (PeptideGroupDocNode nodePepGroup in listPepGroups)
                {
                    var listPeptides = new List<PeptideDocNode>();
                    foreach (PeptideDocNode nodePep in nodePepGroup.Children)
                    {
                        var identity = nodePep.Peptide.IsCustomIon
                            ? new RefinementIdentity(nodePep.Peptide.CustomIon)
                            : new RefinementIdentity(document.Settings.GetModifiedSequence(nodePep));
                        if (!repeatedPeptides.Contains(identity))
                            listPeptides.Add(nodePep);
                    }

                    PeptideGroupDocNode nodePepGroupRefined = (PeptideGroupDocNode)
                        nodePepGroup.ChangeChildrenChecked(listPeptides.ToArray(), true);

                    if (nodePepGroupRefined.Children.Count < minPeptides)
                        continue;

                    listPepGroupsFiltered.Add(nodePepGroupRefined);
                }

                listPepGroups = listPepGroupsFiltered;
            }

            return (SrmDocument) document.ChangeChildrenChecked(listPepGroups.ToArray(), true);
        }
Esempio n. 6
0
        private SrmDocument LookupProteinMetadata(SrmDocument docOrig, IProgressMonitor progressMonitor)
        {
            lock (_processedNodes)
            {
                // Check to make sure this operation was not canceled while this thread was
                // waiting to acquire the lock.  This also cleans up pending work.
                if (progressMonitor.IsCanceled)
                {
                    return(null);
                }

                IProgressStatus progressStatus = new ProgressStatus(Resources.ProteinMetadataManager_LookupProteinMetadata_resolving_protein_details);
                int             nResolved      = 0;
                int             nUnresolved    = docOrig.PeptideGroups.Select(pg => pg.ProteinMetadata.NeedsSearch()).Count();

                if ((nUnresolved > 0) && !docOrig.Settings.PeptideSettings.BackgroundProteome.IsNone)
                {
                    // Do a quick check to see if background proteome already has the info
                    if (!docOrig.Settings.PeptideSettings.BackgroundProteome.NeedsProteinMetadataSearch)
                    {
                        try
                        {
                            using (var proteomeDb = docOrig.Settings.PeptideSettings.BackgroundProteome.OpenProteomeDb())
                            {
                                foreach (PeptideGroupDocNode nodePepGroup in docOrig.PeptideGroups)
                                {
                                    if (_processedNodes.ContainsKey(nodePepGroup.Id.GlobalIndex))
                                    {
                                        // We did this before we were interrupted
                                        nResolved++;
                                    }
                                    else if (nodePepGroup.ProteinMetadata.NeedsSearch())
                                    {
                                        var proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.Name);
                                        if ((proteinMetadata == null) && !Equals(nodePepGroup.Name, nodePepGroup.OriginalName))
                                        {
                                            proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.OriginalName); // Original name might hit
                                        }
                                        if ((proteinMetadata == null) && !String.IsNullOrEmpty(nodePepGroup.ProteinMetadata.Accession))
                                        {
                                            proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.ProteinMetadata.Accession); // Parsed accession might hit
                                        }
                                        if ((proteinMetadata != null) && !proteinMetadata.NeedsSearch())
                                        {
                                            // Background proteome has already resolved this
                                            _processedNodes.Add(nodePepGroup.Id.GlobalIndex, proteinMetadata);
                                            nResolved++;
                                        }
                                    }

                                    if (!UpdatePrecentComplete(progressMonitor, 100 * nResolved / nUnresolved, ref progressStatus))
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                        // ReSharper disable once EmptyGeneralCatchClause
                        catch
                        {
                            // The protDB file is busy, or some other issue - just go directly to web
                        }
                    }
                }
                if (nResolved != nUnresolved)
                {
                    try
                    {
                        // Now go to the web for more protein metadata (or pretend to, depending on WebEnabledFastaImporter.DefaultWebAccessMode)
                        var docNodesWithUnresolvedProteinMetadata = new Dictionary <ProteinSearchInfo, PeptideGroupDocNode>();
                        var proteinsToSearch = new List <ProteinSearchInfo>();
                        foreach (PeptideGroupDocNode node in docOrig.PeptideGroups)
                        {
                            if (node.ProteinMetadata.NeedsSearch() && !_processedNodes.ContainsKey(node.Id.GlobalIndex)) // Did we already process this?
                            {
                                var proteinMetadata = node.ProteinMetadata;
                                if (proteinMetadata.WebSearchInfo.IsEmpty()) // Never even been hit with regex
                                {
                                    // Use Regexes to get some metadata, and a search term
                                    var parsedProteinMetaData = FastaImporter.ParseProteinMetaData(proteinMetadata);
                                    if ((parsedProteinMetaData == null) || Equals(parsedProteinMetaData.Merge(proteinMetadata), proteinMetadata.SetWebSearchCompleted()))
                                    {
                                        // That didn't parse well enough to make a search term, or didn't add any new info - just set it as searched so we don't keep trying
                                        _processedNodes.Add(node.Id.GlobalIndex, proteinMetadata.SetWebSearchCompleted());
                                        if (!UpdatePrecentComplete(progressMonitor, 100 * nResolved++ / nUnresolved, ref progressStatus))
                                        {
                                            return(null);
                                        }
                                        proteinMetadata = null;  // No search to be done
                                    }
                                    else
                                    {
                                        proteinMetadata = proteinMetadata.Merge(parsedProteinMetaData);  // Fill in any gaps with parsed info
                                    }
                                }
                                if (proteinMetadata != null)
                                {
                                    // We note the sequence length because it's useful in disambiguating search results
                                    proteinsToSearch.Add(new ProteinSearchInfo(new DbProteinName(null, proteinMetadata),
                                                                               node.PeptideGroup.Sequence == null ? 0 : node.PeptideGroup.Sequence.Length));
                                    docNodesWithUnresolvedProteinMetadata.Add(proteinsToSearch.Last(), node);
                                }
                            }
                        }

                        if (!UpdatePrecentComplete(progressMonitor, 100 * nResolved / nUnresolved, ref progressStatus))
                        {
                            return(null);
                        }

                        // Now we actually hit the internet
                        if (proteinsToSearch.Any())
                        {
                            foreach (var result in FastaImporter.DoWebserviceLookup(proteinsToSearch, progressMonitor, false)) // Resolve them all, now
                            {
                                Assume.IsTrue(!result.GetProteinMetadata().NeedsSearch());
                                _processedNodes.Add(docNodesWithUnresolvedProteinMetadata[result].Id.GlobalIndex, result.GetProteinMetadata());
                                if (!UpdatePrecentComplete(progressMonitor, 100 * nResolved++ / nUnresolved, ref progressStatus))
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    catch (OperationCanceledException)
                    {
                        progressMonitor.UpdateProgress(progressStatus.Cancel());
                        return(null);
                    }
                }

                // And finally write back to the document
                var listProteins = new List <PeptideGroupDocNode>();
                foreach (PeptideGroupDocNode node in docOrig.MoleculeGroups)
                {
                    if (_processedNodes.TryGetValue(node.Id.GlobalIndex, out var proteinMetadata))
                    {
                        // Compare existing and proposed metadata, ignoring name difference in case user changed
                        // the name manually in the Targets tree while a background metadata lookup was going on, and
                        // ignoring web search details since the existing node probably hasn't any yet.
                        //
                        // This fixes issue https://skyline.ms/announcements/home/support/thread.view?rowId=49107 in which:
                        //    the user pasted a protein sequence into the Targets tree
                        //    then tried to type in a name to replace the default assigned name "sequence1"
                        //    after a few seconds the displayed name reverted to "sequence1" upon background protein metadata search completion
                        // N.B. as this is timing dependent, and our automated tests are mandated to not require internet
                        // access, writing a test for this fix (i.e. adding timings to the fake web lookup system) proved to
                        // be tricky and finally deemed not worth the effort for this fairly obscure problem.
                        if (!Equals(node.ProteinMetadata.Name, proteinMetadata.Name) &&                        // Different name
                            Equals(node.ProteinMetadata.ChangeName(proteinMetadata.Name).ClearWebSearchInfo(), // But otherwise identical
                                   proteinMetadata.ClearWebSearchInfo()))
                        {
                            // Leave (apparently user-renamed) node alone, and note the web search that was actually used.
                            listProteins.Add(node.ChangeProteinMetadata(node.ProteinMetadata.ChangeWebSearchInfo(proteinMetadata.WebSearchInfo)));
                        }
                        else
                        {
                            // Update the protein metadata for this node, if any
                            listProteins.Add(node.ChangeProteinMetadata(proteinMetadata));
                        }
                    }
                    else
                    {
                        // Not yet processed
                        listProteins.Add(node);
                    }
                }
                var docNew = docOrig.ChangeChildrenChecked(listProteins.Cast <DocNode>().ToArray());
                progressMonitor.UpdateProgress(progressStatus.Complete());
                return((SrmDocument)docNew);
            }
        }
Esempio n. 7
0
        /// <summary>
        /// Enumerate all document peptides. If a library peptide already exists in the
        /// current document, update the transition groups for that document peptide and
        /// remove the peptide from the list to add.
        /// </summary>
        /// <param name="document">The starting document</param>
        /// <param name="dictCopy">A dictionary of peptides to peptide matches. All added
        /// peptides are removed</param>
        /// <param name="toPath">Currently selected path.</param>
        /// <param name="selectedPath">Selected path after the nodes have been added</param>
        /// <returns>A new document with precursors for existing petides added</returns>
        private SrmDocument UpdateExistingPeptides(SrmDocument document,
                                                   Dictionary <PeptideSequenceModKey, PeptideMatch> dictCopy,
                                                   IdentityPath toPath, out IdentityPath selectedPath)
        {
            selectedPath = toPath;
            IList <DocNode> nodePepGroups         = new List <DocNode>();
            var             keysAddedWithoutMatch = new
                                                    SortedDictionary <PeptideSequenceModKey, PeptideMatch>();

            foreach (PeptideGroupDocNode nodePepGroup in document.MoleculeGroups)
            {
                IList <DocNode> nodePeps = new List <DocNode>();
                foreach (PeptideDocNode nodePep in nodePepGroup.Children)
                {
                    var          key = nodePep.SequenceKey;
                    PeptideMatch peptideMatch;
                    // If this peptide is not in our list of peptides to add,
                    // or if we are in a peptide list and this peptide has been matched to protein(s),
                    // then we don't touch this particular node.
                    if (!dictCopy.TryGetValue(key, out peptideMatch) ||
                        (nodePepGroup.IsPeptideList &&
                         (peptideMatch.Proteins != null && peptideMatch.Proteins.Any())))
                    {
                        nodePeps.Add(nodePep);
                        if (keysAddedWithoutMatch.ContainsKey(key))
                        {
                            keysAddedWithoutMatch.Add(key, new PeptideMatch(null, null, false));
                        }
                    }
                    else
                    {
                        var proteinName  = nodePepGroup.PeptideGroup.Name;
                        int indexProtein = -1;
                        if (peptideMatch.Proteins != null)
                        {
                            indexProtein =
                                peptideMatch.Proteins.IndexOf(protein => Equals(protein.ProteinMetadata.Name, proteinName));
                            // If the user has opted to filter duplicate peptides, remove this peptide from the list to
                            // add and continue.
                            if (FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.NoDuplicates && peptideMatch.Proteins.Count > 1)
                            {
                                dictCopy.Remove(key);
                                nodePeps.Add(nodePep);
                                continue;
                            }
                            // [1] If this protein is not the first match, and the user has opted to add only the first occurence,
                            // [2] or if this protein is not one of the matches, and [2a] we are either not in a peptide list
                            // [2b] or the user has opted to filter unmatched peptides, ignore this particular node.
                            if ((indexProtein > 0 && FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.FirstOccurence) ||
                                (indexProtein == -1 &&
                                 (!nodePepGroup.IsPeptideList || !Properties.Settings.Default.LibraryPeptidesAddUnmatched)))
                            {
                                nodePeps.Add(nodePep);
                                continue;
                            }
                        }
                        // Update the children of the peptide in the document to include the charge state of the peptide we are adding.
                        PeptideDocNode nodePepMatch      = peptideMatch.NodePep;
                        PeptideDocNode nodePepSettings   = null;
                        var            newChildren       = nodePep.Children.ToList();
                        Identity       nodeGroupChargeId = newChildren.Count > 0 ? newChildren[0].Id : null;
                        foreach (TransitionGroupDocNode nodeGroup in nodePepMatch.Children)
                        {
                            var chargeGroup = nodeGroup.TransitionGroup.PrecursorAdduct;
                            if (nodePep.HasChildCharge(chargeGroup))
                            {
                                SkippedPeptideCount++;
                            }
                            else
                            {
                                if (nodePepSettings == null)
                                {
                                    nodePepSettings = nodePepMatch.ChangeSettings(document.Settings, SrmSettingsDiff.ALL);
                                }
                                TransitionGroupDocNode nodeGroupCharge = (TransitionGroupDocNode)nodePepSettings.FindNode(nodeGroup.TransitionGroup);
                                if (nodeGroupCharge == null)
                                {
                                    continue;
                                }
                                if (peptideMatch.Proteins != null && peptideMatch.Proteins.Count > 1)
                                {
                                    // If we may be adding this specific node to the document more than once, create a copy of it so that
                                    // we don't have two nodes with the same global id.
                                    nodeGroupCharge = (TransitionGroupDocNode)nodeGroupCharge.CopyId();
                                    nodeGroupCharge = (TransitionGroupDocNode)nodeGroupCharge.ChangeChildren(
                                        nodeGroupCharge.Children.ToList().ConvertAll(child => child.CopyId()));
                                }
                                nodeGroupChargeId = nodeGroupCharge.Id;
                                newChildren.Add(nodeGroupCharge);
                            }
                        }
                        // Sort the new peptide children.
                        newChildren.Sort(Peptide.CompareGroups);
                        var nodePepAdd = nodePep.ChangeChildrenChecked(newChildren);
                        // If we have changed the children, need to set automanage children to false.
                        if (nodePep.AutoManageChildren && !ReferenceEquals(nodePep, nodePepAdd))
                        {
                            nodePepAdd = nodePepAdd.ChangeAutoManageChildren(false);
                        }
                        // Change the selected path.
                        if (PeptideMatches.Count == 1)
                        {
                            selectedPath = nodeGroupChargeId == null
                                                ? new IdentityPath(new[] { nodePepGroup.Id, nodePepAdd.Id })
                                                : new IdentityPath(new[] { nodePepGroup.Id, nodePepAdd.Id, nodeGroupChargeId });
                        }
                        nodePeps.Add(nodePepAdd);
                        // Remove this peptide from the list of peptides we need to add to the document
                        dictCopy.Remove(key);
                        if (peptideMatch.Proteins != null)
                        {
                            if (indexProtein != -1)
                            {
                                // Remove this protein from the list of proteins associated with the peptide.
                                peptideMatch.Proteins.RemoveAt(indexProtein);
                            }
                            // If this peptide has not yet been added to all matched proteins,
                            // put it back in the list of peptides to add.
                            if (peptideMatch.Proteins.Count != 0 && FilterMultipleProteinMatches != BackgroundProteome.DuplicateProteinsFilter.FirstOccurence)
                            {
                                dictCopy.Add(key, peptideMatch);
                            }
                        }
                    }
                }
                nodePepGroups.Add(nodePepGroup.ChangeChildrenChecked(nodePeps));
            }
            return((SrmDocument)document.ChangeChildrenChecked(nodePepGroups));
        }
        private SrmDocument LookupProteinMetadata(SrmDocument docOrig, IProgressMonitor progressMonitor)
        {
            lock (_processedNodes)
            {
                // Check to make sure this operation was not canceled while this thread was
                // waiting to acquire the lock.  This also cleans up pending work.
                if (progressMonitor.IsCanceled)
                    return null;

                var progressStatus = new ProgressStatus(Resources.ProteinMetadataManager_LookupProteinMetadata_resolving_protein_details);
                int nResolved = 0;
                int nUnresolved = docOrig.PeptideGroups.Select(pg => pg.ProteinMetadata.NeedsSearch()).Count();

                if ((nUnresolved > 0) && !docOrig.Settings.PeptideSettings.BackgroundProteome.IsNone)
                {
                    // Do a quick check to see if background proteome already has the info
                    if (!docOrig.Settings.PeptideSettings.BackgroundProteome.NeedsProteinMetadataSearch)
                    {
                        try
                        {
                            using (var proteomeDb = docOrig.Settings.PeptideSettings.BackgroundProteome.OpenProteomeDb())
                            {
                                foreach (PeptideGroupDocNode nodePepGroup in docOrig.PeptideGroups)
                                {
                                    if (_processedNodes.ContainsKey(nodePepGroup.Id.GlobalIndex))
                                    {
                                        // We did this before we were interrupted
                                        progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved));
                                    }
                                    else if (nodePepGroup.ProteinMetadata.NeedsSearch())
                                    {
                                        var proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.Name);
                                        if ((proteinMetadata == null) && !Equals(nodePepGroup.Name, nodePepGroup.OriginalName))
                                            proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.OriginalName); // Original name might hit
                                        if ((proteinMetadata == null) && !String.IsNullOrEmpty(nodePepGroup.ProteinMetadata.Accession))
                                            proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.ProteinMetadata.Accession); // Parsed accession might hit
                                        if ((proteinMetadata != null) && !proteinMetadata.NeedsSearch())
                                        {
                                            // Background proteome has already resolved this
                                            _processedNodes.Add(nodePepGroup.Id.GlobalIndex, proteinMetadata);
                                            progressMonitor.UpdateProgress(
                                                progressStatus =
                                                    progressStatus.ChangePercentComplete(100*nResolved++/nUnresolved));
                                        }
                                    }
                                    if (progressMonitor.IsCanceled)
                                    {
                                        progressMonitor.UpdateProgress(progressStatus.Cancel());
                                        return null;
                                    }
                                }
                            }
                        }
                        // ReSharper disable once EmptyGeneralCatchClause
                        catch
                        {
                            // The protDB file is busy, or some other issue - just go directly to web
                        }
                    }
                }
                if (nResolved != nUnresolved)
                {
                    try
                    {
                        // Now go to the web for more protein metadata (or pretend to, depending on WebEnabledFastaImporter.DefaultWebAccessMode)
                        var docNodesWithUnresolvedProteinMetadata = new Dictionary<ProteinSearchInfo,PeptideGroupDocNode>();
                        var proteinsToSearch = new List<ProteinSearchInfo>();
                        foreach (PeptideGroupDocNode node in docOrig.PeptideGroups)
                        {
                            if (node.ProteinMetadata.NeedsSearch() && !_processedNodes.ContainsKey(node.Id.GlobalIndex)) // Did we already process this?
                            {
                                var proteinMetadata = node.ProteinMetadata;
                                if (proteinMetadata.WebSearchInfo.IsEmpty()) // Never even been hit with regex
                                {
                                    // Use Regexes to get some metadata, and a search term
                                    var parsedProteinMetaData = FastaImporter.ParseProteinMetaData(proteinMetadata);
                                    if ((parsedProteinMetaData == null) || Equals(parsedProteinMetaData.Merge(proteinMetadata),proteinMetadata.SetWebSearchCompleted()))
                                    {
                                        // That didn't parse well enough to make a search term, or didn't add any new info - just set it as searched so we don't keep trying
                                        _processedNodes.Add(node.Id.GlobalIndex, proteinMetadata.SetWebSearchCompleted());
                                        progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved));
                                        proteinMetadata = null;  // No search to be done
                                    }
                                    else
                                    {
                                        proteinMetadata = proteinMetadata.Merge(parsedProteinMetaData);  // Fill in any gaps with parsed info
                                    }
                                }
                                if (proteinMetadata != null)
                                {
                                    // We note the sequence length because it's useful in disambiguating search results
                                    proteinsToSearch.Add(new ProteinSearchInfo(new DbProteinName(null, proteinMetadata),
                                        node.PeptideGroup.Sequence == null ? 0 : node.PeptideGroup.Sequence.Length));
                                    docNodesWithUnresolvedProteinMetadata.Add(proteinsToSearch.Last(), node);
                                }
                            }
                        }
                        if (progressMonitor.IsCanceled)
                        {
                            progressMonitor.UpdateProgress(progressStatus.Cancel());
                            return null;
                        }
                        progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved / nUnresolved));

                        // Now we actually hit the internet
                        if (proteinsToSearch.Any())
                        {
                            foreach (var result in FastaImporter.DoWebserviceLookup(proteinsToSearch, progressMonitor, false)) // Resolve them all, now
                            {
                                Debug.Assert(!result.GetProteinMetadata().NeedsSearch());
                                _processedNodes.Add(docNodesWithUnresolvedProteinMetadata[result].Id.GlobalIndex, result.GetProteinMetadata());
                                progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(100 * nResolved++ / nUnresolved));
                            }
                        }
                    }
                    catch (OperationCanceledException)
                    {
                        progressMonitor.UpdateProgress(progressStatus.Cancel());
                        return null;
                    }

                }

                // And finally write back to the document
                var listProteins = new List<PeptideGroupDocNode>();
                foreach (PeptideGroupDocNode node in docOrig.MoleculeGroups)
                {
                    if (_processedNodes.ContainsKey(node.Id.GlobalIndex))
                    {
                        listProteins.Add(node.ChangeProteinMetadata(_processedNodes[node.Id.GlobalIndex]));
                    }
                    else
                    {
                        listProteins.Add(node);
                    }
                }
                var docNew = docOrig.ChangeChildrenChecked(listProteins.Cast<DocNode>().ToArray());
                progressMonitor.UpdateProgress(progressStatus.Complete());
                return (SrmDocument)docNew;
            }
        }
 /// <summary>
 /// Enumerate all document peptides. If a library peptide already exists in the
 /// current document, update the transition groups for that document peptide and
 /// remove the peptide from the list to add.
 /// </summary>
 /// <param name="document">The starting document</param>
 /// <param name="dictCopy">A dictionary of peptides to peptide matches. All added
 /// peptides are removed</param>
 /// <param name="toPath">Currently selected path.</param>
 /// <param name="selectedPath">Selected path after the nodes have been added</param>
 /// <returns>A new document with precursors for existing petides added</returns>
 private SrmDocument UpdateExistingPeptides(SrmDocument document,
     Dictionary<PeptideSequenceModKey, PeptideMatch> dictCopy,
     IdentityPath toPath, out IdentityPath selectedPath)
 {
     selectedPath = toPath;
     IList<DocNode> nodePepGroups = new List<DocNode>();
     foreach (PeptideGroupDocNode nodePepGroup in document.PeptideGroups)
     {
         IList<DocNode> nodePeps = new List<DocNode>();
         foreach (PeptideDocNode nodePep in nodePepGroup.Children)
         {
             var key = nodePep.SequenceKey;
             PeptideMatch peptideMatch;
             // If this peptide is not in our list of peptides to add,
             // or if we are in a peptide list and this peptide has been matched to protein(s),
             // then we don't touch this particular node.
             if (!dictCopy.TryGetValue(key, out peptideMatch) ||
                 (nodePepGroup.IsPeptideList &&
                 (peptideMatch.Proteins != null && peptideMatch.Proteins.Any())))
                 nodePeps.Add(nodePep);
             else
             {
                 var proteinName = nodePepGroup.PeptideGroup.Name;
                 int indexProtein = -1;
                 if (peptideMatch.Proteins != null)
                 {
                     indexProtein =
                         peptideMatch.Proteins.IndexOf(protein => Equals(protein.ProteinMetadata.Name, proteinName));
                     // If the user has opted to filter duplicate peptides, remove this peptide from the list to
                     // add and continue.
                     if(FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.NoDuplicates && peptideMatch.Proteins.Count > 1)
                     {
                         dictCopy.Remove(key);
                         nodePeps.Add(nodePep);
                         continue;
                     }
                     // [1] If this protein is not the first match, and the user has opted to add only the first occurence,
                     // [2] or if this protein is not one of the matches, and [2a] we are either not in a peptide list
                     // [2b] or the user has opted to filter unmatched peptides, ignore this particular node.
                     if((indexProtein > 0 && FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.FirstOccurence) ||
                        (indexProtein == -1 &&
                        (!nodePepGroup.IsPeptideList || !Properties.Settings.Default.LibraryPeptidesAddUnmatched)))
                     {
                         nodePeps.Add(nodePep);
                         continue;
                     }
                 }
                 // Update the children of the peptide in the document to include the charge state of the peptide we are adding.
                 PeptideDocNode nodePepMatch = peptideMatch.NodePep;
                 PeptideDocNode nodePepSettings = null;
                 var newChildren = nodePep.Children.ToList();
                 Identity nodeGroupChargeId = newChildren.Count > 0 ? newChildren[0].Id : null;
                 foreach (TransitionGroupDocNode nodeGroup in nodePepMatch.Children)
                 {
                     int chargeGroup = nodeGroup.TransitionGroup.PrecursorCharge;
                     if (nodePep.HasChildCharge(chargeGroup))
                         SkippedPeptideCount++;
                     else
                     {
                         if (nodePepSettings == null)
                             nodePepSettings = nodePepMatch.ChangeSettings(document.Settings, SrmSettingsDiff.ALL);
                         TransitionGroupDocNode nodeGroupCharge = (TransitionGroupDocNode) nodePepSettings.FindNode(nodeGroup.TransitionGroup);
                         if (nodeGroupCharge == null)
                         {
                             continue;
                         }
                         if(peptideMatch.Proteins != null && peptideMatch.Proteins.Count() > 1)
                         {
                             // If we may be adding this specific node to the document more than once, create a copy of it so that
                             // we don't have two nodes with the same global id.
                             nodeGroupCharge = (TransitionGroupDocNode) nodeGroupCharge.CopyId();
                             nodeGroupCharge = (TransitionGroupDocNode) nodeGroupCharge.ChangeChildren(
                                 nodeGroupCharge.Children.ToList().ConvertAll(child => child.CopyId()));
                         }
                         nodeGroupChargeId = nodeGroupCharge.Id;
                         newChildren.Add(nodeGroupCharge);
                     }
                 }
                 // Sort the new peptide children.
                 newChildren.Sort(Peptide.CompareGroups);
                 var nodePepAdd = nodePep.ChangeChildrenChecked(newChildren);
                 // If we have changed the children, need to set automanage children to false.
                 if (nodePep.AutoManageChildren && !ReferenceEquals(nodePep, nodePepAdd))
                     nodePepAdd = nodePepAdd.ChangeAutoManageChildren(false);
                 // Change the selected path.
                 if (PeptideMatches.Count == 1)
                 {
                     selectedPath = nodeGroupChargeId == null
                                         ? new IdentityPath(new[] { nodePepGroup.Id, nodePepAdd.Id })
                                         : new IdentityPath(new[] { nodePepGroup.Id, nodePepAdd.Id, nodeGroupChargeId });
                 }
                 nodePeps.Add(nodePepAdd);
                 // Remove this peptide from the list of peptides we need to add to the document
                 dictCopy.Remove(key);
                 if (peptideMatch.Proteins != null)
                 {
                     if (indexProtein != -1)
                         // Remove this protein from the list of proteins associated with the peptide.
                         peptideMatch.Proteins.RemoveAt(indexProtein);
                     // If this peptide has not yet been added to all matched proteins,
                     // put it back in the list of peptides to add.
                     if (peptideMatch.Proteins.Count != 0 && FilterMultipleProteinMatches != BackgroundProteome.DuplicateProteinsFilter.FirstOccurence)
                         dictCopy.Add(key, peptideMatch);
                 }
             }
         }
         nodePepGroups.Add(nodePepGroup.ChangeChildrenChecked(nodePeps));
     }
     return (SrmDocument) document.ChangeChildrenChecked(nodePepGroups);
 }