Exemple #1
0
        public Dictionary <Target, DigestionPeptideStats> GetPeptidesAppearanceCounts(Dictionary <Target, bool> peptidesOfInterest, Enzyme enzyme, PeptideSettings settings, SrmSettingsChangeMonitor progressMonitor)
        {
            var appearances = GetPeptidesAppearances(peptidesOfInterest, enzyme, settings, progressMonitor);

            if (appearances == null)
            {
                return(null); // Cancelled
            }
            return(appearances.ToDictionary(pep => new Target(pep.Key),
                                            pep => new DigestionPeptideStats(pep.Value.Proteins.Count, pep.Value.Genes.Count, pep.Value.Species.Count)));
        }
        public Dictionary <Target, bool> PeptidesUniquenessFilter(Dictionary <Target, bool> sequences, PeptideSettings peptideSettings, SrmSettingsChangeMonitor progressMonitor)
        {
            var peptideUniquenessConstraint = peptideSettings.Filter.PeptideUniqueness;

            Assume.IsTrue(sequences.All(s => s.Value));  // Caller should seed this with all true
            if (peptideUniquenessConstraint == PeptideFilter.PeptideUniquenessConstraint.none ||
                peptideSettings.BackgroundProteome == null || peptideSettings.BackgroundProteome.IsNone)
            {
                return(sequences);  // No filtering
            }
            lock (_cache)
            {
                var peptideUniquenessDict = _cache.GetUniquenessDict(peptideSettings, sequences, progressMonitor);
                if (peptideUniquenessDict == null)
                {
                    return(new Dictionary <Target, bool>());  // Cancelled
                }
                foreach (var seq in sequences.Keys.ToArray())
                {
                    DigestionPeptideStats appearances;
                    if (peptideUniquenessDict.TryGetValue(seq, out appearances))
                    {
                        bool isUnique;
                        switch (peptideUniquenessConstraint)
                        {
                        case PeptideFilter.PeptideUniquenessConstraint.protein:
                            isUnique = appearances.Proteins <= 1;
                            break;

                        case PeptideFilter.PeptideUniquenessConstraint.gene:
                            isUnique = appearances.Genes <= 1;
                            break;

                        case PeptideFilter.PeptideUniquenessConstraint.species:
                            isUnique = appearances.Species <= 1;
                            break;

                        default:
                            throw new ArgumentOutOfRangeException(nameof(peptideSettings));
                        }
                        sequences[seq] = isUnique;
                    }
                }
            }
            return(sequences);
        }
            /// <summary>
            /// Get, create, or update the current dictionary that gives uniqueness information for peptides of interest.
            /// </summary>
            /// <param name="peptideSettings">enzyme info in case we need to perform digestion</param>
            /// <param name="peptidesOfInterest">this is a dictionary instead of a list only because we need an efficient lookup, and caller will already have created this which can be large and expensive to construct.</param>
            /// <param name="progressMonitor">cancellation checker</param>
            /// <returns>updated peptide settings with uniqueness information for peptides of interest</returns>
            public Dictionary <Target, DigestionPeptideStats> GetUniquenessDict(PeptideSettings peptideSettings, Dictionary <Target, bool> peptidesOfInterest, SrmSettingsChangeMonitor progressMonitor)
            {
                // Do we have a cached dictionary suitable to the task?
                var enzyme = peptideSettings.Enzyme;

                if (!(enzyme.Name != _enzymeNameForPeptideUniquenessDictDigest || peptidesOfInterest.Keys.Any(pep => !_peptideUniquenessDict.ContainsKey(pep))))
                {
                    return(_peptideUniquenessDict);  // No change needed
                }

                if (!_parent.UpdateProgressAndCheckForCancellation(progressMonitor, 0))
                {
                    return(null);  // Cancelled
                }
                // Any peptides we were interested in before (ie in the current dict if any) are likely still
                // interesting in future calls, even if not of immediate interest
                foreach (var seq in _peptideUniquenessDict.Where(i => !peptidesOfInterest.ContainsKey(i.Key)))
                {
                    peptidesOfInterest.Add(seq.Key, true);
                }

                var newDict = _parent.GetPeptidesAppearanceCounts(peptidesOfInterest, enzyme, peptideSettings, progressMonitor);

                if (newDict == null)
                {
                    return(null); // Cancelled
                }
                if (!Equals(enzyme.Name, _enzymeNameForPeptideUniquenessDictDigest))
                {
                    _peptideUniquenessDict = new Dictionary <Target, DigestionPeptideStats>();
                }
                else
                {
                    _peptideUniquenessDict = _peptideUniquenessDict.ToDictionary(s => s.Key, s => s.Value);
                }
                foreach (var pair in newDict)
                {
                    if (!_peptideUniquenessDict.ContainsKey(pair.Key))
                    {
                        _peptideUniquenessDict.Add(pair.Key, pair.Value);
                    }
                    else
                    {
                        _peptideUniquenessDict[pair.Key] = pair.Value;
                    }
                }
                _enzymeNameForPeptideUniquenessDictDigest = enzyme.Name;
                if (!_parent.UpdateProgressAndCheckForCancellation(progressMonitor, 100))
                {
                    return(null);  // Cancelled
                }
                return(_peptideUniquenessDict);
            }
        // N.B. leaving this level of indirection in place as it will be useful in speeding up the Unique Peptides dialog
        /// <summary>
        /// Examine the background proteome for uniqueness information about the peptides of interest
        /// </summary>
        /// <param name="peptidesOfInterest">this is a dict instead of a list only because upstream callers have already prepared this, which can be large and expensive to construct</param>
        /// <param name="enzyme">how we digest</param>
        /// <param name="settings">details like max missed cleavages</param>
        /// <param name="progressMonitor">cancellation checker</param>
        /// <returns></returns>
        public Dictionary <string, DigestionPeptideStatsDetailed> GetPeptidesAppearances(
            Dictionary <Target, bool> peptidesOfInterest, Enzyme enzyme, PeptideSettings settings, SrmSettingsChangeMonitor progressMonitor)
        {
            if (string.IsNullOrEmpty(DatabasePath))
            {
                return(null);
            }
            var results = peptidesOfInterest.ToDictionary(pep => pep.Key.Sequence, pep => new DigestionPeptideStatsDetailed());

            if (results.Count == 0)
            {
                return(results);
            }
            var       protease         = new ProteaseImpl(enzyme);
            var       maxPeptideLength = peptidesOfInterest.Max(p => p.Key.Sequence.Length); // No interest in any peptide longer than the longest one of interest
            const int DIGEST_CHUNKSIZE = 1000;                                               // Check for cancel every N proteins
            var       proteinCount     = 0;

            using (var proteomeDb = OpenProteomeDb())
            {
                var goal                = Math.Max(proteomeDb.GetProteinCount(), 1);
                var batchCount          = 0;
                var minimalProteinInfos = new ProteomeDb.MinimalProteinInfo[DIGEST_CHUNKSIZE];
                foreach (var minimalProteinInfo in proteomeDb.GetMinimalProteinInfo()) // Get list of sequence, proteinID, gene, species from the protdb file
                {
                    minimalProteinInfos[batchCount++] = minimalProteinInfo;
                    var pct = Math.Max(1, 100 * proteinCount++ / goal); // Show at least a little progressat start  to give user hope
                    if (batchCount == 0 && !UpdateProgressAndCheckForCancellation(progressMonitor, pct))
                    {
                        return(null);
                    }
                    else if (((minimalProteinInfo == null) && --batchCount > 0) || batchCount == DIGEST_CHUNKSIZE)
                    {
                        ParallelEx.For(0, batchCount, ii =>
                        {
                            var protein = minimalProteinInfos[ii];
                            foreach (var peptide in
                                     protease.DigestSequence(protein.Sequence, settings.DigestSettings.MaxMissedCleavages, maxPeptideLength))
                            {
                                DigestionPeptideStatsDetailed appearances;
                                if (results.TryGetValue(peptide.Sequence, out appearances))
                                {
                                    lock (appearances)
                                    {
                                        appearances.Proteins.Add(protein.Id);
                                        appearances.Genes.Add(protein.Gene);      // HashSet eliminates duplicates
                                        appearances.Species.Add(protein.Species); // HashSet eliminates duplicates
                                    }
                                }
                            }
                        });
                        batchCount = 0;
                    }
                }
            }
            return(results);
        }
Exemple #5
0
        public IEnumerable <PeptideDocNode> GetPeptideNodes(SrmSettings settings, bool useFilter, SrmSettingsChangeMonitor monitor = null)
        {
            // FASTA sequences can generate a comprehensive list of available peptides.
            FastaSequence fastaSeq = Id as FastaSequence;

            if (fastaSeq != null)
            {
                foreach (PeptideDocNode nodePep in fastaSeq.CreatePeptideDocNodes(settings, useFilter, null))
                {
                    if (monitor != null && monitor.IsCanceled())
                    {
                        throw new OperationCanceledException();
                    }
                    yield return(nodePep);
                }
            }
            // Peptide lists without variable modifications just return their existing children.
            else if (!settings.PeptideSettings.Modifications.HasVariableModifications)
            {
                foreach (PeptideDocNode nodePep in Children)
                {
                    if (monitor != null && monitor.IsCanceled())
                    {
                        throw new OperationCanceledException();
                    }
                    if (!nodePep.HasVariableMods)
                    {
                        yield return(nodePep);
                    }
                }
            }
            // If there are variable modifications, fill out the available list.
            else
            {
                var            setNonExplicit = new HashSet <Peptide>();
                IPeptideFilter filter         = (useFilter ? settings : PeptideFilter.UNFILTERED);
                foreach (PeptideDocNode nodePep in Children)
                {
                    if (monitor != null && monitor.IsCanceled())
                    {
                        throw new OperationCanceledException();
                    }
                    if (nodePep.Peptide.IsCustomMolecule) // Modifications mean nothing to custom ions // TODO(bspratt) but static isotope labels do?
                    {
                        yield return(nodePep);
                    }
                    else if (nodePep.HasExplicitMods && !nodePep.HasVariableMods)
                    {
                        yield return(nodePep);
                    }
                    else if (!setNonExplicit.Contains(nodePep.Peptide))
                    {
                        bool returnedResult = false;
                        var  peptide        = nodePep.Peptide;
                        // The peptide will be returned as the Id of the unmodified instance of this
                        // peptide.  If the peptide DocNode is explicitly modified this will cause
                        // two nodes in the tree to have the same Id.  So, use a copy instead.
                        if (nodePep.HasExplicitMods)
                        {
                            peptide = (Peptide)peptide.Copy();
                        }
                        foreach (PeptideDocNode nodePepResult in peptide.CreateDocNodes(settings, filter))
                        {
                            yield return(nodePepResult);

                            returnedResult = true;
                        }
                        // Make sure the peptide is not removed due to filtering
                        if (!returnedResult)
                        {
                            yield return(nodePep);
                        }
                        setNonExplicit.Add(nodePep.Peptide);
                    }
                }
            }
        }
        public AreaCVRefinementData(SrmDocument document, AreaCVRefinementSettings settings,
                                    CancellationToken token, SrmSettingsChangeMonitor progressMonitor = null)
        {
            _settings = settings;
            if (document == null || !document.Settings.HasResults)
            {
                return;
            }

            var replicates  = document.MeasuredResults.Chromatograms.Count;
            var areas       = new List <AreaInfo>(replicates);
            var annotations = AnnotationHelper.GetPossibleAnnotations(document, settings.Group).ToArray();

            if (!annotations.Any() && settings.Group == null)
            {
                annotations = new string[] { null }
            }
            ;

            _internalData = new List <InternalData>();
            var hasHeavyMods       = document.Settings.PeptideSettings.Modifications.HasHeavyModifications;
            var hasGlobalStandards = document.Settings.HasGlobalStandardArea;
            var ms1 = settings.MsLevel == AreaCVMsLevel.precursors;

            // Avoid using not-MS1 with a document that is only MS1
            if (!ms1 && document.MoleculeTransitions.All(t => t.IsMs1))
            {
                ms1 = true;
            }
            double?qvalueCutoff = null;

            if (ShouldUseQValues(document))
            {
                qvalueCutoff = _settings.QValueCutoff;
            }

            int?minDetections = null;

            if (_settings.MinimumDetections != -1)
            {
                minDetections = _settings.MinimumDetections;
            }

            MedianInfo medianInfo = null;
            int?       ratioIndex = null;

            if (settings.NormalizeOption.IsRatioToLabel)
            {
                var isotopeLabelTypeName = (settings.NormalizeOption.NormalizationMethod as NormalizationMethod.RatioToLabel)
                                           ?.IsotopeLabelTypeName;
                ratioIndex =
                    document.Settings.PeptideSettings.Modifications.RatioInternalStandardTypes.IndexOf(type =>
                                                                                                       type.Name == isotopeLabelTypeName);
            }
            if (_settings.NormalizeOption.Is(NormalizationMethod.EQUALIZE_MEDIANS))
            {
                medianInfo = CalculateMedianAreas(document);
            }
            NormalizationData normalizationData = null;

            foreach (var peptideGroup in document.MoleculeGroups)
            {
                foreach (var peptide in peptideGroup.Molecules)
                {
                    if (progressMonitor != null)
                    {
                        progressMonitor.ProcessMolecule(peptide);
                    }

                    if (_settings.PointsType == PointsTypePeakArea.decoys != peptide.IsDecoy)
                    {
                        continue;
                    }

                    CalibrationCurveFitter calibrationCurveFitter = null;
                    CalibrationCurve       calibrationCurve       = null;
                    IEnumerable <TransitionGroupDocNode> transitionGroups;
                    if (_settings.NormalizeOption == NormalizeOption.CALIBRATED ||
                        _settings.NormalizeOption == NormalizeOption.DEFAULT)
                    {
                        if (!peptide.TransitionGroups.Any())
                        {
                            continue;
                        }
                        var peptideQuantifier = PeptideQuantifier.GetPeptideQuantifier(() =>
                        {
                            return(normalizationData = normalizationData ?? NormalizationData.GetNormalizationData(document, false, null));
                        }, document.Settings, peptideGroup, peptide);
                        calibrationCurveFitter = new CalibrationCurveFitter(peptideQuantifier, document.Settings);
                        transitionGroups       = new[] { peptide.TransitionGroups.First() };
                        if (_settings.NormalizeOption == NormalizeOption.CALIBRATED)
                        {
                            calibrationCurve = calibrationCurveFitter.GetCalibrationCurve();
                            if (calibrationCurve == null)
                            {
                                continue;
                            }
                        }
                    }
                    else
                    {
                        transitionGroups = peptide.TransitionGroups;
                    }
                    foreach (var transitionGroupDocNode in transitionGroups)
                    {
                        foreach (var a in annotations)
                        {
                            areas.Clear();

                            if (!Equals(a, _settings.Annotation) && (_settings.Group == null || _settings.Annotation != null))
                            {
                                continue;
                            }

                            foreach (var replicateIndex in AnnotationHelper.GetReplicateIndices(document,
                                                                                                _settings.Group, a))
                            {
                                if (progressMonitor != null && progressMonitor.IsCanceled())
                                {
                                    throw new OperationCanceledException();
                                }

                                token.ThrowIfCancellationRequested();
                                var groupChromInfo = transitionGroupDocNode.GetSafeChromInfo(replicateIndex)
                                                     .FirstOrDefault(c => c.OptimizationStep == 0);
                                if (groupChromInfo == null)
                                {
                                    continue;
                                }

                                if (qvalueCutoff.HasValue)
                                {
                                    if (!(groupChromInfo.QValue.HasValue &&
                                          groupChromInfo.QValue.Value < qvalueCutoff.Value))
                                    {
                                        continue;
                                    }
                                }

                                double sumArea, normalizedArea;
                                if (calibrationCurveFitter != null)
                                {
                                    double?value;
                                    if (calibrationCurve != null)
                                    {
                                        value = calibrationCurveFitter.GetCalculatedConcentration(calibrationCurve,
                                                                                                  replicateIndex);
                                    }
                                    else
                                    {
                                        value = calibrationCurveFitter.GetNormalizedPeakArea(
                                            new CalibrationPoint(replicateIndex, null));
                                    }
                                    if (!value.HasValue)
                                    {
                                        continue;
                                    }

                                    sumArea        = value.Value;
                                    normalizedArea = value.Value;
                                }
                                else
                                {
                                    if (!groupChromInfo.Area.HasValue)
                                    {
                                        continue;
                                    }
                                    var index = replicateIndex;
                                    sumArea = transitionGroupDocNode.Transitions.Where(t =>
                                    {
                                        if (ms1 != t.IsMs1 || !t.ExplicitQuantitative)
                                        {
                                            return(false);
                                        }

                                        var chromInfo = t.GetSafeChromInfo(index)
                                                        .FirstOrDefault(c => c.OptimizationStep == 0);
                                        if (chromInfo == null)
                                        {
                                            return(false);
                                        }
                                        if (_settings.Transitions == AreaCVTransitions.best)
                                        {
                                            return(chromInfo.RankByLevel == 1);
                                        }
                                        if (_settings.Transitions == AreaCVTransitions.all)
                                        {
                                            return(true);
                                        }

                                        return(chromInfo.RankByLevel <= _settings.CountTransitions);
                                        // ReSharper disable once PossibleNullReferenceException
                                    }).Sum(t => (double)t.GetSafeChromInfo(index)
                                           .FirstOrDefault(c => c.OptimizationStep == 0).Area);

                                    normalizedArea = sumArea;
                                    if (_settings.NormalizeOption.Is(NormalizationMethod.EQUALIZE_MEDIANS))
                                    {
                                        normalizedArea /= medianInfo.Medians[replicateIndex] / medianInfo.MedianMedian;
                                    }
                                    else if (_settings.NormalizeOption.Is(NormalizationMethod.GLOBAL_STANDARDS) &&
                                             hasGlobalStandards)
                                    {
                                        normalizedArea =
                                            NormalizeToGlobalStandard(document, transitionGroupDocNode, replicateIndex,
                                                                      sumArea);
                                    }
                                    else if (_settings.NormalizeOption.Is(NormalizationMethod.TIC))
                                    {
                                        var denominator = document.Settings.GetTicNormalizationDenominator(
                                            replicateIndex, groupChromInfo.FileId);
                                        if (!denominator.HasValue)
                                        {
                                            continue;
                                        }

                                        normalizedArea /= denominator.Value;
                                    }
                                    else if (hasHeavyMods &&
                                             _settings.NormalizeOption.NormalizationMethod is NormalizationMethod
                                             .RatioToLabel)
                                    {
                                        var ci = transitionGroupDocNode.GetSafeChromInfo(replicateIndex)
                                                 .FirstOrDefault(c => c.OptimizationStep == 0);
                                        RatioValue ratioValue = null;
                                        if (ratioIndex.HasValue && ratioIndex.Value >= 0 &&
                                            ratioIndex.Value < ci.Ratios.Count)
                                        {
                                            ratioValue = ci.Ratios[ratioIndex.Value];
                                        }

                                        if (ratioValue == null)
                                        {
                                            continue;
                                        }

                                        normalizedArea = ratioValue.Ratio;
                                    }
                                }
                                areas.Add(new AreaInfo(sumArea, normalizedArea));
                            }

                            if (qvalueCutoff.HasValue && minDetections.HasValue && areas.Count < minDetections.Value)
                            {
                                continue;
                            }

                            _settings.AddToInternalData(_internalData, areas, peptideGroup, peptide, transitionGroupDocNode, a);
                        }
                    }
                }
            }
            Data = ImmutableList <CVData> .ValueOf(_internalData.GroupBy(i => i, (key, grouped) =>
            {
                var groupedArray = grouped.ToArray();
                return(new CVData(
                           groupedArray.Select(idata => new PeptideAnnotationPair(idata.PeptideGroup, idata.Peptide, idata.TransitionGroup, idata.Annotation, idata.CV)),
                           key.CVBucketed, key.Area, groupedArray.Length));
            }).OrderBy(d => d.CV));
        }
        public static List <GroupComparisonResult> ComputeResults(GroupComparer groupComparer, SrmDocument document,
                                                                  CancellationToken?cancellationToken, object _lock, SrmSettingsChangeMonitor progressMonitor = null)
        {
            var results       = new List <GroupComparisonResult>();
            var peptideGroups = document.MoleculeGroups.ToArray();

            for (int i = 0; i < peptideGroups.Length; i++)
            {
                if (_lock != null)
                {
                    lock (_lock)
                    {
                        if (cancellationToken.HasValue)
                        {
                            cancellationToken.Value.ThrowIfCancellationRequested();
                        }
                    }
                }

                var peptideGroup = peptideGroups[i];
                IEnumerable <PeptideDocNode> peptides;
                if (groupComparer.ComparisonDef.PerProtein)
                {
                    peptides = new PeptideDocNode[] { null };
                }
                else
                {
                    peptides = peptideGroup.Molecules;
                }
                foreach (var peptide in peptides)
                {
                    if (progressMonitor != null && progressMonitor.IsCanceled())
                    {
                        throw new OperationCanceledException();
                    }
                    if (progressMonitor != null)
                    {
                        progressMonitor.ProcessMolecule(peptide);
                    }
                    results.AddRange(groupComparer.CalculateFoldChanges(peptideGroup, peptide));
                }
            }

            return(results);
        }
Exemple #8
0
        public void CommitBatchModifyDocument(string description, DataGridViewPasteHandler.BatchModifyInfo batchModifyInfo)
        {
            if (null == _batchChangesOriginalDocument)
            {
                throw new InvalidOperationException();
            }
            string message = Resources.DataGridViewPasteHandler_EndDeferSettingsChangesOnDocument_Updating_settings;

            SkylineWindow.ModifyDocument(description, document =>
            {
                VerifyDocumentCurrent(_batchChangesOriginalDocument, document);
                using (var longWaitDlg = new LongWaitDlg
                {
                    Message = message
                })
                {
                    SrmDocument newDocument = null;
                    longWaitDlg.PerformWork(SkylineWindow, 1000, progressMonitor =>
                    {
                        var srmSettingsChangeMonitor = new SrmSettingsChangeMonitor(progressMonitor,
                                                                                    message);
                        newDocument = _document.EndDeferSettingsChanges(_batchChangesOriginalDocument.Settings, srmSettingsChangeMonitor);
                    });
                    return(newDocument);
                }
            }, docPair =>
            {
                MessageType singular, plural;
                var detailType = MessageType.set_to_in_document_grid;
                Func <EditDescription, object[]> getArgsFunc = descr => new object[]
                {
                    descr.ColumnCaption.GetCaption(DataSchemaLocalizer), descr.ElementRefName,
                    CellValueToString(descr.Value)
                };

                switch (batchModifyInfo.BatchModifyAction)
                {
                case DataGridViewPasteHandler.BatchModifyAction.Paste:
                    singular = MessageType.pasted_document_grid_single;
                    plural   = MessageType.pasted_document_grid;
                    break;

                case DataGridViewPasteHandler.BatchModifyAction.Clear:
                    singular    = MessageType.cleared_document_grid_single;
                    plural      = MessageType.cleared_document_grid;
                    detailType  = MessageType.cleared_cell_in_document_grid;
                    getArgsFunc = descr => new[] { (object)descr.ColumnCaption.GetCaption(DataSchemaLocalizer), descr.ElementRefName };
                    break;

                case DataGridViewPasteHandler.BatchModifyAction.FillDown:
                    singular = MessageType.fill_down_document_grid_single;
                    plural   = MessageType.fill_down_document_grid;
                    break;

                default:
                    return(null);
                }

                var entry = AuditLogEntry.CreateCountChangeEntry(docPair.OldDoc, singular, plural,
                                                                 _batchEditDescriptions,
                                                                 descr => MessageArgs.Create(descr.ColumnCaption.GetCaption(DataSchemaLocalizer)),
                                                                 null).ChangeExtraInfo(batchModifyInfo.ExtraInfo + Environment.NewLine);

                entry = entry.Merge(batchModifyInfo.EntryCreator.Create(docPair));

                return(entry.AppendAllInfo(_batchEditDescriptions.Select(descr => new MessageInfo(detailType,
                                                                                                  getArgsFunc(descr))).ToList()));
            });
            _batchChangesOriginalDocument = null;
            _batchEditDescriptions        = null;
            DocumentChangedEventHandler(_documentContainer, new DocumentChangedEventArgs(_document));
        }