public Dictionary <Target, DigestionPeptideStats> GetPeptidesAppearanceCounts(Dictionary <Target, bool> peptidesOfInterest, Enzyme enzyme, PeptideSettings settings, SrmSettingsChangeMonitor progressMonitor) { var appearances = GetPeptidesAppearances(peptidesOfInterest, enzyme, settings, progressMonitor); if (appearances == null) { return(null); // Cancelled } return(appearances.ToDictionary(pep => new Target(pep.Key), pep => new DigestionPeptideStats(pep.Value.Proteins.Count, pep.Value.Genes.Count, pep.Value.Species.Count))); }
public Dictionary <Target, bool> PeptidesUniquenessFilter(Dictionary <Target, bool> sequences, PeptideSettings peptideSettings, SrmSettingsChangeMonitor progressMonitor) { var peptideUniquenessConstraint = peptideSettings.Filter.PeptideUniqueness; Assume.IsTrue(sequences.All(s => s.Value)); // Caller should seed this with all true if (peptideUniquenessConstraint == PeptideFilter.PeptideUniquenessConstraint.none || peptideSettings.BackgroundProteome == null || peptideSettings.BackgroundProteome.IsNone) { return(sequences); // No filtering } lock (_cache) { var peptideUniquenessDict = _cache.GetUniquenessDict(peptideSettings, sequences, progressMonitor); if (peptideUniquenessDict == null) { return(new Dictionary <Target, bool>()); // Cancelled } foreach (var seq in sequences.Keys.ToArray()) { DigestionPeptideStats appearances; if (peptideUniquenessDict.TryGetValue(seq, out appearances)) { bool isUnique; switch (peptideUniquenessConstraint) { case PeptideFilter.PeptideUniquenessConstraint.protein: isUnique = appearances.Proteins <= 1; break; case PeptideFilter.PeptideUniquenessConstraint.gene: isUnique = appearances.Genes <= 1; break; case PeptideFilter.PeptideUniquenessConstraint.species: isUnique = appearances.Species <= 1; break; default: throw new ArgumentOutOfRangeException(nameof(peptideSettings)); } sequences[seq] = isUnique; } } } return(sequences); }
/// <summary> /// Get, create, or update the current dictionary that gives uniqueness information for peptides of interest. /// </summary> /// <param name="peptideSettings">enzyme info in case we need to perform digestion</param> /// <param name="peptidesOfInterest">this is a dictionary instead of a list only because we need an efficient lookup, and caller will already have created this which can be large and expensive to construct.</param> /// <param name="progressMonitor">cancellation checker</param> /// <returns>updated peptide settings with uniqueness information for peptides of interest</returns> public Dictionary <Target, DigestionPeptideStats> GetUniquenessDict(PeptideSettings peptideSettings, Dictionary <Target, bool> peptidesOfInterest, SrmSettingsChangeMonitor progressMonitor) { // Do we have a cached dictionary suitable to the task? var enzyme = peptideSettings.Enzyme; if (!(enzyme.Name != _enzymeNameForPeptideUniquenessDictDigest || peptidesOfInterest.Keys.Any(pep => !_peptideUniquenessDict.ContainsKey(pep)))) { return(_peptideUniquenessDict); // No change needed } if (!_parent.UpdateProgressAndCheckForCancellation(progressMonitor, 0)) { return(null); // Cancelled } // Any peptides we were interested in before (ie in the current dict if any) are likely still // interesting in future calls, even if not of immediate interest foreach (var seq in _peptideUniquenessDict.Where(i => !peptidesOfInterest.ContainsKey(i.Key))) { peptidesOfInterest.Add(seq.Key, true); } var newDict = _parent.GetPeptidesAppearanceCounts(peptidesOfInterest, enzyme, peptideSettings, progressMonitor); if (newDict == null) { return(null); // Cancelled } if (!Equals(enzyme.Name, _enzymeNameForPeptideUniquenessDictDigest)) { _peptideUniquenessDict = new Dictionary <Target, DigestionPeptideStats>(); } else { _peptideUniquenessDict = _peptideUniquenessDict.ToDictionary(s => s.Key, s => s.Value); } foreach (var pair in newDict) { if (!_peptideUniquenessDict.ContainsKey(pair.Key)) { _peptideUniquenessDict.Add(pair.Key, pair.Value); } else { _peptideUniquenessDict[pair.Key] = pair.Value; } } _enzymeNameForPeptideUniquenessDictDigest = enzyme.Name; if (!_parent.UpdateProgressAndCheckForCancellation(progressMonitor, 100)) { return(null); // Cancelled } return(_peptideUniquenessDict); }
// N.B. leaving this level of indirection in place as it will be useful in speeding up the Unique Peptides dialog /// <summary> /// Examine the background proteome for uniqueness information about the peptides of interest /// </summary> /// <param name="peptidesOfInterest">this is a dict instead of a list only because upstream callers have already prepared this, which can be large and expensive to construct</param> /// <param name="enzyme">how we digest</param> /// <param name="settings">details like max missed cleavages</param> /// <param name="progressMonitor">cancellation checker</param> /// <returns></returns> public Dictionary <string, DigestionPeptideStatsDetailed> GetPeptidesAppearances( Dictionary <Target, bool> peptidesOfInterest, Enzyme enzyme, PeptideSettings settings, SrmSettingsChangeMonitor progressMonitor) { if (string.IsNullOrEmpty(DatabasePath)) { return(null); } var results = peptidesOfInterest.ToDictionary(pep => pep.Key.Sequence, pep => new DigestionPeptideStatsDetailed()); if (results.Count == 0) { return(results); } var protease = new ProteaseImpl(enzyme); var maxPeptideLength = peptidesOfInterest.Max(p => p.Key.Sequence.Length); // No interest in any peptide longer than the longest one of interest const int DIGEST_CHUNKSIZE = 1000; // Check for cancel every N proteins var proteinCount = 0; using (var proteomeDb = OpenProteomeDb()) { var goal = Math.Max(proteomeDb.GetProteinCount(), 1); var batchCount = 0; var minimalProteinInfos = new ProteomeDb.MinimalProteinInfo[DIGEST_CHUNKSIZE]; foreach (var minimalProteinInfo in proteomeDb.GetMinimalProteinInfo()) // Get list of sequence, proteinID, gene, species from the protdb file { minimalProteinInfos[batchCount++] = minimalProteinInfo; var pct = Math.Max(1, 100 * proteinCount++ / goal); // Show at least a little progressat start to give user hope if (batchCount == 0 && !UpdateProgressAndCheckForCancellation(progressMonitor, pct)) { return(null); } else if (((minimalProteinInfo == null) && --batchCount > 0) || batchCount == DIGEST_CHUNKSIZE) { ParallelEx.For(0, batchCount, ii => { var protein = minimalProteinInfos[ii]; foreach (var peptide in protease.DigestSequence(protein.Sequence, settings.DigestSettings.MaxMissedCleavages, maxPeptideLength)) { DigestionPeptideStatsDetailed appearances; if (results.TryGetValue(peptide.Sequence, out appearances)) { lock (appearances) { appearances.Proteins.Add(protein.Id); appearances.Genes.Add(protein.Gene); // HashSet eliminates duplicates appearances.Species.Add(protein.Species); // HashSet eliminates duplicates } } } }); batchCount = 0; } } } return(results); }
public IEnumerable <PeptideDocNode> GetPeptideNodes(SrmSettings settings, bool useFilter, SrmSettingsChangeMonitor monitor = null) { // FASTA sequences can generate a comprehensive list of available peptides. FastaSequence fastaSeq = Id as FastaSequence; if (fastaSeq != null) { foreach (PeptideDocNode nodePep in fastaSeq.CreatePeptideDocNodes(settings, useFilter, null)) { if (monitor != null && monitor.IsCanceled()) { throw new OperationCanceledException(); } yield return(nodePep); } } // Peptide lists without variable modifications just return their existing children. else if (!settings.PeptideSettings.Modifications.HasVariableModifications) { foreach (PeptideDocNode nodePep in Children) { if (monitor != null && monitor.IsCanceled()) { throw new OperationCanceledException(); } if (!nodePep.HasVariableMods) { yield return(nodePep); } } } // If there are variable modifications, fill out the available list. else { var setNonExplicit = new HashSet <Peptide>(); IPeptideFilter filter = (useFilter ? settings : PeptideFilter.UNFILTERED); foreach (PeptideDocNode nodePep in Children) { if (monitor != null && monitor.IsCanceled()) { throw new OperationCanceledException(); } if (nodePep.Peptide.IsCustomMolecule) // Modifications mean nothing to custom ions // TODO(bspratt) but static isotope labels do? { yield return(nodePep); } else if (nodePep.HasExplicitMods && !nodePep.HasVariableMods) { yield return(nodePep); } else if (!setNonExplicit.Contains(nodePep.Peptide)) { bool returnedResult = false; var peptide = nodePep.Peptide; // The peptide will be returned as the Id of the unmodified instance of this // peptide. If the peptide DocNode is explicitly modified this will cause // two nodes in the tree to have the same Id. So, use a copy instead. if (nodePep.HasExplicitMods) { peptide = (Peptide)peptide.Copy(); } foreach (PeptideDocNode nodePepResult in peptide.CreateDocNodes(settings, filter)) { yield return(nodePepResult); returnedResult = true; } // Make sure the peptide is not removed due to filtering if (!returnedResult) { yield return(nodePep); } setNonExplicit.Add(nodePep.Peptide); } } } }
public AreaCVRefinementData(SrmDocument document, AreaCVRefinementSettings settings, CancellationToken token, SrmSettingsChangeMonitor progressMonitor = null) { _settings = settings; if (document == null || !document.Settings.HasResults) { return; } var replicates = document.MeasuredResults.Chromatograms.Count; var areas = new List <AreaInfo>(replicates); var annotations = AnnotationHelper.GetPossibleAnnotations(document, settings.Group).ToArray(); if (!annotations.Any() && settings.Group == null) { annotations = new string[] { null } } ; _internalData = new List <InternalData>(); var hasHeavyMods = document.Settings.PeptideSettings.Modifications.HasHeavyModifications; var hasGlobalStandards = document.Settings.HasGlobalStandardArea; var ms1 = settings.MsLevel == AreaCVMsLevel.precursors; // Avoid using not-MS1 with a document that is only MS1 if (!ms1 && document.MoleculeTransitions.All(t => t.IsMs1)) { ms1 = true; } double?qvalueCutoff = null; if (ShouldUseQValues(document)) { qvalueCutoff = _settings.QValueCutoff; } int?minDetections = null; if (_settings.MinimumDetections != -1) { minDetections = _settings.MinimumDetections; } MedianInfo medianInfo = null; int? ratioIndex = null; if (settings.NormalizeOption.IsRatioToLabel) { var isotopeLabelTypeName = (settings.NormalizeOption.NormalizationMethod as NormalizationMethod.RatioToLabel) ?.IsotopeLabelTypeName; ratioIndex = document.Settings.PeptideSettings.Modifications.RatioInternalStandardTypes.IndexOf(type => type.Name == isotopeLabelTypeName); } if (_settings.NormalizeOption.Is(NormalizationMethod.EQUALIZE_MEDIANS)) { medianInfo = CalculateMedianAreas(document); } NormalizationData normalizationData = null; foreach (var peptideGroup in document.MoleculeGroups) { foreach (var peptide in peptideGroup.Molecules) { if (progressMonitor != null) { progressMonitor.ProcessMolecule(peptide); } if (_settings.PointsType == PointsTypePeakArea.decoys != peptide.IsDecoy) { continue; } CalibrationCurveFitter calibrationCurveFitter = null; CalibrationCurve calibrationCurve = null; IEnumerable <TransitionGroupDocNode> transitionGroups; if (_settings.NormalizeOption == NormalizeOption.CALIBRATED || _settings.NormalizeOption == NormalizeOption.DEFAULT) { if (!peptide.TransitionGroups.Any()) { continue; } var peptideQuantifier = PeptideQuantifier.GetPeptideQuantifier(() => { return(normalizationData = normalizationData ?? NormalizationData.GetNormalizationData(document, false, null)); }, document.Settings, peptideGroup, peptide); calibrationCurveFitter = new CalibrationCurveFitter(peptideQuantifier, document.Settings); transitionGroups = new[] { peptide.TransitionGroups.First() }; if (_settings.NormalizeOption == NormalizeOption.CALIBRATED) { calibrationCurve = calibrationCurveFitter.GetCalibrationCurve(); if (calibrationCurve == null) { continue; } } } else { transitionGroups = peptide.TransitionGroups; } foreach (var transitionGroupDocNode in transitionGroups) { foreach (var a in annotations) { areas.Clear(); if (!Equals(a, _settings.Annotation) && (_settings.Group == null || _settings.Annotation != null)) { continue; } foreach (var replicateIndex in AnnotationHelper.GetReplicateIndices(document, _settings.Group, a)) { if (progressMonitor != null && progressMonitor.IsCanceled()) { throw new OperationCanceledException(); } token.ThrowIfCancellationRequested(); var groupChromInfo = transitionGroupDocNode.GetSafeChromInfo(replicateIndex) .FirstOrDefault(c => c.OptimizationStep == 0); if (groupChromInfo == null) { continue; } if (qvalueCutoff.HasValue) { if (!(groupChromInfo.QValue.HasValue && groupChromInfo.QValue.Value < qvalueCutoff.Value)) { continue; } } double sumArea, normalizedArea; if (calibrationCurveFitter != null) { double?value; if (calibrationCurve != null) { value = calibrationCurveFitter.GetCalculatedConcentration(calibrationCurve, replicateIndex); } else { value = calibrationCurveFitter.GetNormalizedPeakArea( new CalibrationPoint(replicateIndex, null)); } if (!value.HasValue) { continue; } sumArea = value.Value; normalizedArea = value.Value; } else { if (!groupChromInfo.Area.HasValue) { continue; } var index = replicateIndex; sumArea = transitionGroupDocNode.Transitions.Where(t => { if (ms1 != t.IsMs1 || !t.ExplicitQuantitative) { return(false); } var chromInfo = t.GetSafeChromInfo(index) .FirstOrDefault(c => c.OptimizationStep == 0); if (chromInfo == null) { return(false); } if (_settings.Transitions == AreaCVTransitions.best) { return(chromInfo.RankByLevel == 1); } if (_settings.Transitions == AreaCVTransitions.all) { return(true); } return(chromInfo.RankByLevel <= _settings.CountTransitions); // ReSharper disable once PossibleNullReferenceException }).Sum(t => (double)t.GetSafeChromInfo(index) .FirstOrDefault(c => c.OptimizationStep == 0).Area); normalizedArea = sumArea; if (_settings.NormalizeOption.Is(NormalizationMethod.EQUALIZE_MEDIANS)) { normalizedArea /= medianInfo.Medians[replicateIndex] / medianInfo.MedianMedian; } else if (_settings.NormalizeOption.Is(NormalizationMethod.GLOBAL_STANDARDS) && hasGlobalStandards) { normalizedArea = NormalizeToGlobalStandard(document, transitionGroupDocNode, replicateIndex, sumArea); } else if (_settings.NormalizeOption.Is(NormalizationMethod.TIC)) { var denominator = document.Settings.GetTicNormalizationDenominator( replicateIndex, groupChromInfo.FileId); if (!denominator.HasValue) { continue; } normalizedArea /= denominator.Value; } else if (hasHeavyMods && _settings.NormalizeOption.NormalizationMethod is NormalizationMethod .RatioToLabel) { var ci = transitionGroupDocNode.GetSafeChromInfo(replicateIndex) .FirstOrDefault(c => c.OptimizationStep == 0); RatioValue ratioValue = null; if (ratioIndex.HasValue && ratioIndex.Value >= 0 && ratioIndex.Value < ci.Ratios.Count) { ratioValue = ci.Ratios[ratioIndex.Value]; } if (ratioValue == null) { continue; } normalizedArea = ratioValue.Ratio; } } areas.Add(new AreaInfo(sumArea, normalizedArea)); } if (qvalueCutoff.HasValue && minDetections.HasValue && areas.Count < minDetections.Value) { continue; } _settings.AddToInternalData(_internalData, areas, peptideGroup, peptide, transitionGroupDocNode, a); } } } } Data = ImmutableList <CVData> .ValueOf(_internalData.GroupBy(i => i, (key, grouped) => { var groupedArray = grouped.ToArray(); return(new CVData( groupedArray.Select(idata => new PeptideAnnotationPair(idata.PeptideGroup, idata.Peptide, idata.TransitionGroup, idata.Annotation, idata.CV)), key.CVBucketed, key.Area, groupedArray.Length)); }).OrderBy(d => d.CV)); }
public static List <GroupComparisonResult> ComputeResults(GroupComparer groupComparer, SrmDocument document, CancellationToken?cancellationToken, object _lock, SrmSettingsChangeMonitor progressMonitor = null) { var results = new List <GroupComparisonResult>(); var peptideGroups = document.MoleculeGroups.ToArray(); for (int i = 0; i < peptideGroups.Length; i++) { if (_lock != null) { lock (_lock) { if (cancellationToken.HasValue) { cancellationToken.Value.ThrowIfCancellationRequested(); } } } var peptideGroup = peptideGroups[i]; IEnumerable <PeptideDocNode> peptides; if (groupComparer.ComparisonDef.PerProtein) { peptides = new PeptideDocNode[] { null }; } else { peptides = peptideGroup.Molecules; } foreach (var peptide in peptides) { if (progressMonitor != null && progressMonitor.IsCanceled()) { throw new OperationCanceledException(); } if (progressMonitor != null) { progressMonitor.ProcessMolecule(peptide); } results.AddRange(groupComparer.CalculateFoldChanges(peptideGroup, peptide)); } } return(results); }
public void CommitBatchModifyDocument(string description, DataGridViewPasteHandler.BatchModifyInfo batchModifyInfo) { if (null == _batchChangesOriginalDocument) { throw new InvalidOperationException(); } string message = Resources.DataGridViewPasteHandler_EndDeferSettingsChangesOnDocument_Updating_settings; SkylineWindow.ModifyDocument(description, document => { VerifyDocumentCurrent(_batchChangesOriginalDocument, document); using (var longWaitDlg = new LongWaitDlg { Message = message }) { SrmDocument newDocument = null; longWaitDlg.PerformWork(SkylineWindow, 1000, progressMonitor => { var srmSettingsChangeMonitor = new SrmSettingsChangeMonitor(progressMonitor, message); newDocument = _document.EndDeferSettingsChanges(_batchChangesOriginalDocument.Settings, srmSettingsChangeMonitor); }); return(newDocument); } }, docPair => { MessageType singular, plural; var detailType = MessageType.set_to_in_document_grid; Func <EditDescription, object[]> getArgsFunc = descr => new object[] { descr.ColumnCaption.GetCaption(DataSchemaLocalizer), descr.ElementRefName, CellValueToString(descr.Value) }; switch (batchModifyInfo.BatchModifyAction) { case DataGridViewPasteHandler.BatchModifyAction.Paste: singular = MessageType.pasted_document_grid_single; plural = MessageType.pasted_document_grid; break; case DataGridViewPasteHandler.BatchModifyAction.Clear: singular = MessageType.cleared_document_grid_single; plural = MessageType.cleared_document_grid; detailType = MessageType.cleared_cell_in_document_grid; getArgsFunc = descr => new[] { (object)descr.ColumnCaption.GetCaption(DataSchemaLocalizer), descr.ElementRefName }; break; case DataGridViewPasteHandler.BatchModifyAction.FillDown: singular = MessageType.fill_down_document_grid_single; plural = MessageType.fill_down_document_grid; break; default: return(null); } var entry = AuditLogEntry.CreateCountChangeEntry(docPair.OldDoc, singular, plural, _batchEditDescriptions, descr => MessageArgs.Create(descr.ColumnCaption.GetCaption(DataSchemaLocalizer)), null).ChangeExtraInfo(batchModifyInfo.ExtraInfo + Environment.NewLine); entry = entry.Merge(batchModifyInfo.EntryCreator.Create(docPair)); return(entry.AppendAllInfo(_batchEditDescriptions.Select(descr => new MessageInfo(detailType, getArgsFunc(descr))).ToList())); }); _batchChangesOriginalDocument = null; _batchEditDescriptions = null; DocumentChangedEventHandler(_documentContainer, new DocumentChangedEventArgs(_document)); }