public void AddRemoveExplicitModTest() { SrmDocument docStudy7 = CreateStudy7Doc(); string transitionList = ExportCsv(docStudy7); Assert.AreEqual(69, transitionList.Split('\n').Length); // Special test mode may add an extra doc node var modifications = docStudy7.Settings.PeptideSettings.Modifications; var listStaticMods = modifications.StaticModifications; var listHeavyMods = modifications.AllHeavyModifications.ToList(); docStudy7 = docStudy7.ChangeSettings(docStudy7.Settings.ChangeTransitionFilter(filter => filter.ChangeAutoSelect(false))); // Remove all modifications int i = 0; // But save them for later var removedMods = new Dictionary <int, ExplicitMods>(); foreach (var peptide in docStudy7.Peptides) { if (peptide.HasExplicitMods) { removedMods.Add(i, peptide.ExplicitMods); IdentityPath path = docStudy7.GetPathTo((int)SrmDocument.Level.Molecules, i); docStudy7 = docStudy7.ChangePeptideMods(path, null, listStaticMods, listHeavyMods); } i++; } // Removes heavy from peptide with c-terminal P AssertEx.IsDocumentState(docStudy7, 6, 7, 11, 21, 63); modifications = docStudy7.Settings.PeptideSettings.Modifications; Assert.AreEqual(2, modifications.AllHeavyModifications.Count()); Assert.AreEqual(0, modifications.AllHeavyModifications.Count(mod => mod.IsExplicit)); Assert.AreEqual(0, docStudy7.Peptides.Count(peptide => peptide.HasExplicitMods)); listHeavyMods = ATOMIC_HEAVY_MODS; foreach (var pair in removedMods) { IdentityPath path = docStudy7.GetPathTo((int)SrmDocument.Level.Molecules, pair.Key); docStudy7 = docStudy7.ChangePeptideMods(path, pair.Value, listStaticMods, listHeavyMods); } AssertEx.IsDocumentState(docStudy7, 11, 7, 11, 21, 63); // Replace the heavy precursor that was removed // TODO: Yuck. Would be nice to have a way to do this without duplicating // so much of the logic in PeptideDocNode and PeptideTreeNode var pepPath = docStudy7.GetPathTo((int)SrmDocument.Level.Molecules, 10); var nodePep = (PeptideDocNode)docStudy7.FindNode(pepPath); var mods = nodePep.ExplicitMods; var nodeGroupLight = (TransitionGroupDocNode)nodePep.Children[0]; var settings = docStudy7.Settings; foreach (var tranGroup in nodePep.GetTransitionGroups(settings, mods, false)) { if (tranGroup.PrecursorAdduct.Equals(nodeGroupLight.TransitionGroup.PrecursorAdduct) && !tranGroup.LabelType.IsLight) { TransitionDocNode[] transitions = nodePep.GetMatchingTransitions(tranGroup, settings, mods); var nodeGroup = new TransitionGroupDocNode(tranGroup, transitions); nodeGroup = nodeGroup.ChangeSettings(settings, nodePep, mods, SrmSettingsDiff.ALL); docStudy7 = (SrmDocument)docStudy7.Add(pepPath, nodeGroup); break; } } AssertEx.IsDocumentState(docStudy7, 12, 7, 11, 22, 66); modifications = docStudy7.Settings.PeptideSettings.Modifications; Assert.AreEqual(2, modifications.AllHeavyModifications.Count(mod => mod.IsExplicit && mod.Label13C)); Assert.AreEqual(2, modifications.AllHeavyModifications.Count(mod => mod.Formula != null)); Assert.AreEqual(3, docStudy7.Peptides.Count(peptide => peptide.HasExplicitMods)); Assert.AreEqual(2, docStudy7.Peptides.Count(peptide => peptide.HasExplicitMods && peptide.ExplicitMods.StaticModifications.Count > 0 && peptide.ExplicitMods.StaticModifications[0].Modification.AAs[0] == 'C')); Assert.AreEqual(2, docStudy7.Peptides.Count(peptide => peptide.HasExplicitMods && peptide.ExplicitMods.HeavyModifications[0].Modification.AAs[0] == 'V' && peptide.ExplicitMods.HeavyModifications[0].Modification.Label13C)); Assert.AreEqual(1, docStudy7.Peptides.Count(peptide => peptide.HasExplicitMods && peptide.ExplicitMods.HeavyModifications[0].Modification.AAs[0] == 'L' && peptide.ExplicitMods.HeavyModifications[0].Modification.Label13C)); AssertEx.NoDiff(transitionList, ExportCsv(docStudy7)); }
private SrmDocument AddPeptides(SrmDocument document, bool validating, ref IdentityPath selectedPath) { if (tabControl1.SelectedTab != tabPagePeptideList) return document; var matcher = new ModificationMatcher(); var listPeptideSequences = ListPeptideSequences(); if (listPeptideSequences == null) return null; try { matcher.CreateMatches(document.Settings, listPeptideSequences, Settings.Default.StaticModList, Settings.Default.HeavyModList); } catch (FormatException e) { MessageDlg.ShowException(this, e); ShowPeptideError(new PasteError { Column = colPeptideSequence.Index, Message = Resources.PasteDlg_AddPeptides_Unable_to_interpret_peptide_modifications }); return null; } var strNameMatches = matcher.FoundMatches; if (!validating && !string.IsNullOrEmpty(strNameMatches)) { string message = TextUtil.LineSeparate(Resources.PasteDlg_AddPeptides_Would_you_like_to_use_the_Unimod_definitions_for_the_following_modifications, string.Empty, strNameMatches); if (MultiButtonMsgDlg.Show(this, message, Resources.PasteDlg_AddPeptides_OK) == DialogResult.Cancel) return null; } var backgroundProteome = GetBackgroundProteome(document); // Insert last to first so that proteins get inserted on top of each other // in the order they are added. Peptide insertion into peptide lists needs // to be carefully tracked to insert them in the order they are listed in // the grid. int lastGroupGlobalIndex = 0, lastPeptideIndex = -1; for (int i = gridViewPeptides.Rows.Count - 1; i >= 0; i--) { PeptideGroupDocNode peptideGroupDocNode; var row = gridViewPeptides.Rows[i]; var pepModSequence = Convert.ToString(row.Cells[colPeptideSequence.Index].Value); pepModSequence = FastaSequence.NormalizeNTerminalMod(pepModSequence); var proteinName = Convert.ToString(row.Cells[colPeptideProtein.Index].Value); if (string.IsNullOrEmpty(pepModSequence) && string.IsNullOrEmpty(proteinName)) continue; if (string.IsNullOrEmpty(proteinName)) { peptideGroupDocNode = GetSelectedPeptideGroupDocNode(document, selectedPath); if (!IsPeptideListDocNode(peptideGroupDocNode)) { peptideGroupDocNode = null; } } else { peptideGroupDocNode = FindPeptideGroupDocNode(document, proteinName); } if (peptideGroupDocNode == null) { if (string.IsNullOrEmpty(proteinName)) { peptideGroupDocNode = new PeptideGroupDocNode(new PeptideGroup(), document.GetPeptideGroupId(true), null, new PeptideDocNode[0]); } else { ProteinMetadata metadata = null; PeptideGroup peptideGroup = backgroundProteome.IsNone ? new PeptideGroup() : (backgroundProteome.GetFastaSequence(proteinName, out metadata) ?? new PeptideGroup()); if (metadata != null) peptideGroupDocNode = new PeptideGroupDocNode(peptideGroup, metadata, new PeptideDocNode[0]); else peptideGroupDocNode = new PeptideGroupDocNode(peptideGroup, proteinName, peptideGroup.Description, new PeptideDocNode[0]); } // Add to the end, if no insert node var to = selectedPath; if (to == null || to.Depth < (int)SrmDocument.Level.MoleculeGroups) document = (SrmDocument)document.Add(peptideGroupDocNode); else { Identity toId = selectedPath.GetIdentity((int) SrmDocument.Level.MoleculeGroups); document = (SrmDocument) document.Insert(toId, peptideGroupDocNode); } selectedPath = new IdentityPath(peptideGroupDocNode.Id); } var peptides = new List<PeptideDocNode>(); foreach (PeptideDocNode peptideDocNode in peptideGroupDocNode.Children) { peptides.Add(peptideDocNode); } var fastaSequence = peptideGroupDocNode.PeptideGroup as FastaSequence; PeptideDocNode nodePepNew; if (fastaSequence != null) { // Attempt to create node for error checking. nodePepNew = fastaSequence.CreateFullPeptideDocNode(document.Settings, FastaSequence.StripModifications(pepModSequence)); if (nodePepNew == null) { ShowPeptideError(new PasteError { Column = colPeptideSequence.Index, Line = i, Message = Resources.PasteDlg_AddPeptides_This_peptide_sequence_was_not_found_in_the_protein_sequence }); return null; } } // Create node using ModificationMatcher. nodePepNew = matcher.GetModifiedNode(pepModSequence, fastaSequence).ChangeSettings(document.Settings, SrmSettingsDiff.ALL); // Avoid adding an existing peptide a second time. if (!peptides.Contains(nodePep => Equals(nodePep.Key, nodePepNew.Key))) { if (nodePepNew.Peptide.FastaSequence != null) { peptides.Add(nodePepNew); peptides.Sort(FastaSequence.ComparePeptides); } else { int groupGlobalIndex = peptideGroupDocNode.PeptideGroup.GlobalIndex; if (groupGlobalIndex == lastGroupGlobalIndex && lastPeptideIndex != -1) { peptides.Insert(lastPeptideIndex, nodePepNew); } else { lastPeptideIndex = peptides.Count; peptides.Add(nodePepNew); } lastGroupGlobalIndex = groupGlobalIndex; } var newPeptideGroupDocNode = new PeptideGroupDocNode(peptideGroupDocNode.PeptideGroup, peptideGroupDocNode.Annotations, peptideGroupDocNode.Name, peptideGroupDocNode.Description, peptides.ToArray(), false); document = (SrmDocument)document.ReplaceChild(newPeptideGroupDocNode); } } if (!validating && listPeptideSequences.Count > 0) { var pepModsNew = matcher.GetDocModifications(document); document = document.ChangeSettings(document.Settings.ChangePeptideModifications(mods => pepModsNew)); document.Settings.UpdateDefaultModifications(false); } return document; }
private SrmDocument AddTransitionList(SrmDocument document, ref IdentityPath selectedPath) { if (tabControl1.SelectedTab != tabPageTransitionList) return document; if (IsMolecule) { // Save the current column order to settings var active = new List<string>(); for (int order = 0; order < gridViewTransitionList.Columns.Count; order++) { for (int gridcol = 0; gridcol < gridViewTransitionList.Columns.Count; gridcol++) { var dataGridViewColumn = gridViewTransitionList.Columns[gridcol]; if (dataGridViewColumn.DisplayIndex == order) { if (dataGridViewColumn.Visible) active.Add(dataGridViewColumn.Name); break; } } } Settings.Default.CustomMoleculeTransitionInsertColumnsList = active; // We will accept a completely empty product list as meaning // "these are all precursor transitions" var requireProductInfo = false; for (var i = 0; i < gridViewTransitionList.RowCount - 1; i++) { var row = gridViewTransitionList.Rows[i]; var productMz = row.Cells[INDEX_PRODUCT_MZ].Value; var productFormula = row.Cells[INDEX_PRODUCT_FORMULA].Value; var productCharge = row.Cells[INDEX_PRODUCT_CHARGE].Value; if ((productMz != null && productMz.ToString().Length > 0) || (productFormula != null && productFormula.ToString().Length > 0) || (productCharge != null && productCharge.ToString().Length > 0)) { requireProductInfo = true; // Product list is not completely empty break; } } // For each row in the grid, add to or begin MoleculeGroup|Molecule|TransitionList tree for(int i = 0; i < gridViewTransitionList.RowCount - 1; i ++) { DataGridViewRow row = gridViewTransitionList.Rows[i]; var precursor = ReadPrecursorOrProductColumns(document, row, true); // Get molecule values if (precursor == null) return null; if (requireProductInfo && ReadPrecursorOrProductColumns(document, row, false) == null) { return null; } var charge = precursor.Charge; var precursorMonoMz = BioMassCalc.CalculateIonMz(precursor.MonoMass, charge); var precursorAverageMz = BioMassCalc.CalculateIonMz(precursor.AverageMass, charge); // Preexisting molecule group? bool pepGroupFound = false; foreach (var pepGroup in document.MoleculeGroups) { var pathPepGroup = new IdentityPath(pepGroup.Id); if (Equals(pepGroup.Name, Convert.ToString(row.Cells[INDEX_MOLECULE_GROUP].Value))) { // Found a molecule group with the same name - can we find an existing transition group to which we can add a transition? pepGroupFound = true; bool pepFound = false; foreach (var pep in pepGroup.SmallMolecules) { var pepPath = new IdentityPath(pathPepGroup, pep.Id); var ionMonoMz = BioMassCalc.CalculateIonMz(pep.CustomIon.MonoisotopicMass, charge); var ionAverageMz = BioMassCalc.CalculateIonMz(pep.CustomIon.AverageMass, charge); // Match existing molecule if same name (if any) and same formula (if any) and similar m/z at the precursor charge // (we don't just check mass since we don't have a tolerance value for that) // Or same name If any) and identical formula when stripped of labels // Or same name, no formula, and different isotope labels if (Equals(pep.CustomIon.Name, precursor.Name) && ((Equals(pep.CustomIon.Formula, precursor.Formula) && Math.Abs(ionMonoMz - precursorMonoMz) <= document.Settings.TransitionSettings.Instrument.MzMatchTolerance && Math.Abs(ionAverageMz - precursorAverageMz) <= document.Settings.TransitionSettings.Instrument.MzMatchTolerance) || (!Equals(pep.CustomIon.Formula, precursor.Formula) && Equals(pep.CustomIon.UnlabeledFormula, BioMassCalc.MONOISOTOPIC.StripLabelsFromFormula(precursor.Formula))) || (string.IsNullOrEmpty(pep.CustomIon.Formula) && string.IsNullOrEmpty(precursor.Formula) && !pep.TransitionGroups.Any(t => Equals(t.TransitionGroup.LabelType, precursor.IsotopeLabelType??IsotopeLabelType.light))) )) { pepFound = true; bool tranGroupFound = false; foreach (var tranGroup in pep.TransitionGroups) { var pathGroup = new IdentityPath(pepPath, tranGroup.Id); if (Math.Abs(tranGroup.PrecursorMz - precursor.Mz) <= document.Settings.TransitionSettings.Instrument.MzMatchTolerance) { tranGroupFound = true; var tranFound = false; try { var tranNode = GetMoleculeTransition(document, row, pep.Peptide, tranGroup.TransitionGroup, requireProductInfo); if (tranNode == null) return null; foreach (var tran in tranGroup.Transitions) { if (Equals(tranNode.Transition.CustomIon,tran.Transition.CustomIon)) { tranFound = true; break; } } if (!tranFound) { document = (SrmDocument) document.Add(pathGroup, tranNode); } } catch (InvalidDataException e) { // Some error we didn't catch in the basic checks ShowTransitionError(new PasteError { Column = 0, Line = row.Index, Message = e.Message }); return null; } break; } } if (!tranGroupFound) { var node = GetMoleculeTransitionGroup(document, row, pep.Peptide, requireProductInfo); if (node == null) return null; document = (SrmDocument) document.Add(pepPath, node); } break; } } if (!pepFound) { var node = GetMoleculePeptide(document, row, pepGroup.PeptideGroup, requireProductInfo); if (node == null) return null; document = (SrmDocument) document.Add(pathPepGroup,node); } break; } } if (!pepGroupFound) { var node = GetMoleculePeptideGroup(document, row, requireProductInfo); if (node == null) return null; IdentityPath first; IdentityPath next; document = document.AddPeptideGroups(new[] {node}, false,null , out first,out next); } } } else { var backgroundProteome = GetBackgroundProteome(document); var sbTransitionList = new StringBuilder(); var dictNameSeq = new Dictionary<string, FastaSequence>(); // Add all existing FASTA sequences in the document to the name to seq dictionary // Including named peptide lists would cause the import code to give matching names // in this list new names (e.g. with 1, 2, 3 appended). In this code, the names // are intended to be merged. foreach (var nodePepGroup in document.Children.Cast<PeptideGroupDocNode>().Where(n => !n.IsPeptideList)) { if (!dictNameSeq.ContainsKey(nodePepGroup.Name)) dictNameSeq.Add(nodePepGroup.Name, (FastaSequence) nodePepGroup.PeptideGroup); } // Check for simple errors and build strings for import for (int i = 0; i < gridViewTransitionList.Rows.Count; i++) { var row = gridViewTransitionList.Rows[i]; var peptideSequence = Convert.ToString(row.Cells[colTransitionPeptide.Index].Value); var proteinName = Convert.ToString(row.Cells[colTransitionProteinName.Index].Value); var precursorMzText = Convert.ToString(row.Cells[colTransitionPrecursorMz.Index].Value); var productMzText = Convert.ToString(row.Cells[colTransitionProductMz.Index].Value); if (string.IsNullOrEmpty(peptideSequence) && string.IsNullOrEmpty(proteinName)) { continue; } if (string.IsNullOrEmpty(peptideSequence)) { ShowTransitionError(new PasteError { Column = colTransitionPeptide.Index, Line = i, Message = Resources.PasteDlg_ListPeptideSequences_The_peptide_sequence_cannot_be_blank }); return null; } if (!FastaSequence.IsExSequence(peptideSequence)) { ShowTransitionError(new PasteError { Column = colTransitionPeptide.Index, Line = i, Message = Resources.PasteDlg_ListPeptideSequences_This_peptide_sequence_contains_invalid_characters }); return null; } double mz; if (!double.TryParse(precursorMzText, out mz)) { ShowTransitionError(new PasteError { Column = colTransitionPrecursorMz.Index, Line = i, Message = Resources.PasteDlg_AddTransitionList_The_precursor_m_z_must_be_a_number_ }); return null; } if (!double.TryParse(productMzText, out mz)) { ShowTransitionError(new PasteError { Column = colTransitionProductMz.Index, Line = i, Message = Resources.PasteDlg_AddTransitionList_The_product_m_z_must_be_a_number_ }); return null; } const char sep = TRANSITION_LIST_SEPARATOR; // Add columns in order specified by TRANSITION_LIST_COL_INDICES sbTransitionList .Append(proteinName).Append(sep) .Append(peptideSequence).Append(sep) .Append(precursorMzText).Append(sep) .Append(productMzText).AppendLine(); // Build FASTA sequence text in cases where it is known if (!dictNameSeq.ContainsKey(proteinName)) { var fastaSeq = backgroundProteome.GetFastaSequence(proteinName); if (fastaSeq != null) dictNameSeq.Add(proteinName, fastaSeq); } } if (sbTransitionList.Length == 0) return document; // Do the actual import into PeptideGroupDocNodes IEnumerable<PeptideGroupDocNode> peptideGroupDocNodes; try { List<TransitionImportErrorInfo> errorList; List<MeasuredRetentionTime> irtPeptides; List<SpectrumMzInfo> librarySpectra; var inputs = new MassListInputs(sbTransitionList.ToString(), LocalizationHelper.CurrentCulture, TRANSITION_LIST_SEPARATOR); var importer = new MassListImporter(document, inputs); // TODO: support long-wait broker peptideGroupDocNodes = importer.Import(null, TRANSITION_LIST_COL_INDICES, dictNameSeq, out irtPeptides, out librarySpectra, out errorList); if (errorList.Any()) { var firstError = errorList[0]; if (firstError.Row.HasValue) { throw new LineColNumberedIoException(firstError.ErrorMessage, firstError.Row.Value, firstError.Column ?? -1); } else { throw new InvalidDataException(firstError.ErrorMessage); } } } catch (LineColNumberedIoException x) { var columns = new[] { colTransitionProteinName, colPeptideSequence, colTransitionPrecursorMz, colTransitionProductMz }; ShowTransitionError(new PasteError { Column = x.ColumnIndex >= 0 ? columns[x.ColumnIndex].Index : 0, Line = (int) x.LineNumber - 1, Message = x.PlainMessage }); return null; } catch (InvalidDataException x) { ShowTransitionError(new PasteError { Message = x.Message }); return null; } // Insert the resulting nodes into the document tree, merging when possible bool after = false; foreach (var nodePepGroup in peptideGroupDocNodes) { PeptideGroupDocNode nodePepGroupExist = FindPeptideGroupDocNode(document, nodePepGroup); if (nodePepGroupExist != null) { var nodePepGroupNew = nodePepGroupExist.Merge(nodePepGroup); if (!ReferenceEquals(nodePepGroupExist, nodePepGroupNew)) document = (SrmDocument) document.ReplaceChild(nodePepGroupNew); } else { // Add to the end, if no insert node var to = selectedPath; if (to == null || to.Depth < (int) SrmDocument.Level.MoleculeGroups) document = (SrmDocument) document.Add(nodePepGroup); else { Identity toId = selectedPath.GetIdentity((int) SrmDocument.Level.MoleculeGroups); document = (SrmDocument) document.Insert(toId, nodePepGroup, after); } selectedPath = new IdentityPath(nodePepGroup.Id); // All future insertions should be after, to avoid reversing the list after = true; } } } return document; }
private SrmDocument AddProteins(SrmDocument document, ref IdentityPath selectedPath) { if (tabControl1.SelectedTab != tabPageProteinList) return document; var backgroundProteome = GetBackgroundProteome(document); for (int i = gridViewProteins.Rows.Count - 1; i >= 0; i--) { var row = gridViewProteins.Rows[i]; var proteinName = Convert.ToString(row.Cells[colProteinName.Index].Value); if (String.IsNullOrEmpty(proteinName)) { continue; } var pastedMetadata = new ProteinMetadata(proteinName, Convert.ToString(row.Cells[colProteinDescription.Index].Value), NullForEmpty(Convert.ToString(row.Cells[colProteinPreferredName.Index].Value)), NullForEmpty(Convert.ToString(row.Cells[colProteinAccession.Index].Value)), NullForEmpty(Convert.ToString(row.Cells[colProteinGene.Index].Value)), NullForEmpty(Convert.ToString(row.Cells[colProteinSpecies.Index].Value))); FastaSequence fastaSequence = null; if (!backgroundProteome.IsNone) { ProteinMetadata protdbMetadata; fastaSequence = backgroundProteome.GetFastaSequence(proteinName, out protdbMetadata); // Fill in any gaps in pasted metadata with that in protdb pastedMetadata = pastedMetadata.Merge(protdbMetadata); } // Strip any whitespace (tab, newline etc) In case it was copied out of a FASTA file var fastaSequenceString = new string(Convert.ToString(row.Cells[colProteinSequence.Index].Value).Where(c => !Char.IsWhiteSpace(c)).ToArray()); if (!string.IsNullOrEmpty(fastaSequenceString)) { try { if (fastaSequence == null) // Didn't match anything in protdb { fastaSequence = new FastaSequence(pastedMetadata.Name, pastedMetadata.Description, new ProteinMetadata[0], fastaSequenceString); } else { if (fastaSequence.Sequence != fastaSequenceString) { fastaSequence = new FastaSequence(pastedMetadata.Name, pastedMetadata.Description, fastaSequence.Alternatives, fastaSequenceString); } } } catch (Exception exception) { ShowProteinError(new PasteError { Line = i, Column = colProteinDescription.Index, Message = string.Format(Resources.PasteDlg_AddProteins_Invalid_protein_sequence__0__, exception.Message) }); return null; } } if (fastaSequence == null) { ShowProteinError( new PasteError { Line = i, Message = backgroundProteome.IsNone ? Resources.PasteDlg_AddProteins_Missing_protein_sequence : Resources.PasteDlg_AddProteins_This_protein_was_not_found_in_the_background_proteome_database }); return null; } var description = pastedMetadata.Description; if (!string.IsNullOrEmpty(description) && description != fastaSequence.Description) { fastaSequence = new FastaSequence(fastaSequence.Name, description, fastaSequence.Alternatives, fastaSequence.Sequence); } pastedMetadata = pastedMetadata.ChangeName(fastaSequence.Name).ChangeDescription(fastaSequence.Description); // Make sure these agree var nodeGroupPep = new PeptideGroupDocNode(fastaSequence, pastedMetadata, new PeptideDocNode[0]); nodeGroupPep = nodeGroupPep.ChangeSettings(document.Settings, SrmSettingsDiff.ALL); var to = selectedPath; if (to == null || to.Depth < (int)SrmDocument.Level.MoleculeGroups) document = (SrmDocument)document.Add(nodeGroupPep); else { Identity toId = selectedPath.GetIdentity((int)SrmDocument.Level.MoleculeGroups); document = (SrmDocument)document.Insert(toId, nodeGroupPep); } selectedPath = new IdentityPath(nodeGroupPep.Id); } return document; }
private static SrmDocument GenerateDecoysFunc(SrmDocument document, int numDecoys, bool multiCycle, Func<SequenceMods, SequenceMods> genDecoySequence) { // Loop through the existing tree in random order creating decoys var settings = document.Settings; var enzyme = settings.PeptideSettings.Enzyme; var decoyNodePepList = new List<PeptideDocNode>(); var setDecoyKeys = new HashSet<PeptideModKey>(); while (numDecoys > 0) { int startDecoys = numDecoys; foreach (var nodePep in document.Peptides.ToArray().RandomOrder()) { if (numDecoys == 0) break; // Decoys should not be based on standard peptides if (nodePep.GlobalStandardType != null) continue; // If the non-terminal end of the peptide sequence is all a single character, skip this peptide, // since it can't support decoy generation. var sequence = nodePep.Peptide.Sequence; if (genDecoySequence != null && sequence.Substring(0, sequence.Length - 1).Distinct().Count() == 1) continue; var seqMods = new SequenceMods(nodePep); if (genDecoySequence != null) { seqMods = genDecoySequence(seqMods); } var peptide = nodePep.Peptide; var decoyPeptide = new Peptide(null, seqMods.Sequence, null, null, enzyme.CountCleavagePoints(seqMods.Sequence), true); if (seqMods.Mods != null) seqMods.Mods = seqMods.Mods.ChangePeptide(decoyPeptide); foreach (var comparableGroups in PeakFeatureEnumerator.ComparableGroups(nodePep)) { var decoyNodeTranGroupList = GetDecoyGroups(nodePep, decoyPeptide, seqMods.Mods, comparableGroups, document, Equals(seqMods.Sequence, peptide.Sequence)); if (decoyNodeTranGroupList.Count == 0) continue; var nodePepNew = new PeptideDocNode(decoyPeptide, settings, seqMods.Mods, null, nodePep.ExplicitRetentionTime, decoyNodeTranGroupList.ToArray(), false); if (!Equals(nodePep.ModifiedSequence, nodePepNew.ModifiedSequence)) { var sourceKey = new ModifiedSequenceMods(nodePep.ModifiedSequence, nodePep.ExplicitMods); nodePepNew = nodePepNew.ChangeSourceKey(sourceKey); } // Avoid adding duplicate peptides if (setDecoyKeys.Contains(nodePepNew.Key)) continue; setDecoyKeys.Add(nodePepNew.Key); decoyNodePepList.Add(nodePepNew); numDecoys--; } } // Stop if not multi-cycle or the number of decoys has not changed. if (!multiCycle || startDecoys == numDecoys) break; } var decoyNodePepGroup = new PeptideGroupDocNode(new PeptideGroup(true), Annotations.EMPTY, PeptideGroup.DECOYS, null, decoyNodePepList.ToArray(), false); decoyNodePepGroup = decoyNodePepGroup.ChangeSettings(document.Settings, SrmSettingsDiff.ALL); return (SrmDocument)document.Add(decoyNodePepGroup); }
public SrmDocument ConvertToSmallMolecules(SrmDocument document, ConvertToSmallMoleculesMode mode = ConvertToSmallMoleculesMode.formulas, bool invertCharges = false, bool ignoreDecoys=false) { if (mode == ConvertToSmallMoleculesMode.none) return document; var newdoc = new SrmDocument(document.Settings); var note = new Annotations(TestingConvertedFromProteomic, null, 1); // Mark this as a testing node so we don't sort it newdoc = (SrmDocument)newdoc.ChangeIgnoreChangingChildren(true); // Retain copied results foreach (var peptideGroupDocNode in document.MoleculeGroups) { if (!peptideGroupDocNode.IsProteomic) { newdoc = (SrmDocument)newdoc.Add(peptideGroupDocNode); // Already a small molecule } else { var newPeptideGroup = new PeptideGroup(); var newPeptideGroupDocNode = new PeptideGroupDocNode(newPeptideGroup, peptideGroupDocNode.Annotations.Merge(note), peptideGroupDocNode.Name, peptideGroupDocNode.Description, new PeptideDocNode[0], peptideGroupDocNode.AutoManageChildren); foreach (var mol in peptideGroupDocNode.Molecules) { var peptideSequence = mol.Peptide.Sequence; // Create a PeptideDocNode with the presumably baseline charge and label var precursorCharge = (mol.TransitionGroups.Any() ? mol.TransitionGroups.First().TransitionGroup.PrecursorCharge : 0) * (invertCharges ? -1 : 1); var isotopeLabelType = mol.TransitionGroups.Any() ? mol.TransitionGroups.First().TransitionGroup.LabelType : IsotopeLabelType.light; var moleculeCustomIon = ConvertToSmallMolecule(mode, document, mol, precursorCharge, isotopeLabelType); var precursorCustomIon = moleculeCustomIon; var newPeptide = new Peptide(moleculeCustomIon); var newPeptideDocNode = new PeptideDocNode(newPeptide, newdoc.Settings, null, null, null, null, mol.ExplicitRetentionTime, note, mol.Results, new TransitionGroupDocNode[0], mol.AutoManageChildren); foreach (var transitionGroupDocNode in mol.TransitionGroups) { if (transitionGroupDocNode.IsDecoy) { if (ignoreDecoys) continue; throw new Exception("There is no translation from decoy to small molecules"); // Not L10N } if (transitionGroupDocNode.TransitionGroup.PrecursorCharge != Math.Abs(precursorCharge) || !Equals(isotopeLabelType, transitionGroupDocNode.TransitionGroup.LabelType)) { // Different charges or labels mean different ion formulas precursorCharge = transitionGroupDocNode.TransitionGroup.PrecursorCharge * (invertCharges ? -1 : 1); isotopeLabelType = transitionGroupDocNode.TransitionGroup.LabelType; precursorCustomIon = ConvertToSmallMolecule(mode, document, mol, precursorCharge, isotopeLabelType); } var newTransitionGroup = new TransitionGroup(newPeptide, precursorCustomIon, precursorCharge, isotopeLabelType); // Remove any library info, since for the moment at least small molecules don't support this and it won't roundtrip var resultsNew = RemoveTransitionGroupChromInfoLibraryInfo(transitionGroupDocNode); var newTransitionGroupDocNode = new TransitionGroupDocNode(newTransitionGroup, transitionGroupDocNode.Annotations.Merge(note), document.Settings, null, null, transitionGroupDocNode.ExplicitValues, resultsNew, null, transitionGroupDocNode.AutoManageChildren); var mzShift = invertCharges ? 2.0 * BioMassCalc.MassProton : 0; // We removed hydrogen rather than added Assume.IsTrue((Math.Abs(newTransitionGroupDocNode.PrecursorMz + mzShift - transitionGroupDocNode.PrecursorMz) - Math.Abs(transitionGroupDocNode.TransitionGroup.PrecursorCharge * BioMassCalc.MassElectron)) <= 1E-5); foreach (var transition in transitionGroupDocNode.Transitions) { double mass = 0; var transitionCharge = transition.Transition.Charge * (invertCharges ? -1 : 1); var ionType = IonType.custom; CustomIon transitionCustomIon; double mzShiftTransition = 0; if (transition.Transition.IonType == IonType.precursor) { ionType = IonType.precursor; transitionCustomIon = new DocNodeCustomIon(precursorCustomIon.Formula, string.IsNullOrEmpty(precursorCustomIon.Formula) ? precursorCustomIon.MonoisotopicMass : (double?) null, string.IsNullOrEmpty(precursorCustomIon.Formula) ? precursorCustomIon.AverageMass : (double?) null, SmallMoleculeNameFromPeptide(peptideSequence, transitionCharge)); mzShiftTransition = invertCharges ? 2.0 * BioMassCalc.MassProton : 0; // We removed hydrogen rather than added } else if (transition.Transition.IonType == IonType.custom) { transitionCustomIon = transition.Transition.CustomIon; mass = transitionCustomIon.MonoisotopicMass; } else { // TODO - try to get fragment formula? mass = BioMassCalc.CalculateIonMassFromMz(transition.Mz, transition.Transition.Charge); transitionCustomIon = new DocNodeCustomIon(mass, mass,// We can't really get at mono vs average mass from m/z, but for test purposes this is fine transition.Transition.FragmentIonName); } if (mode == ConvertToSmallMoleculesMode.masses_and_names) { // Discard the formula if we're testing the use of mass-with-names (for matching in ratio calcs) target specification transitionCustomIon = new DocNodeCustomIon(transitionCustomIon.MonoisotopicMass, transitionCustomIon.AverageMass, transition.Transition.FragmentIonName); } else if (mode == ConvertToSmallMoleculesMode.masses_only) { // Discard the formula and name if we're testing the use of mass-only target specification transitionCustomIon = new DocNodeCustomIon(transitionCustomIon.MonoisotopicMass, transitionCustomIon.AverageMass); } var newTransition = new Transition(newTransitionGroup, ionType, null, transition.Transition.MassIndex, transition.Transition.Charge * (invertCharges ? -1 : 1), null, transitionCustomIon); if (ionType == IonType.precursor) { mass = document.Settings.GetFragmentMass(transitionGroupDocNode.TransitionGroup.LabelType, null, newTransition, newTransitionGroupDocNode.IsotopeDist); } var newTransitionDocNode = new TransitionDocNode(newTransition, transition.Annotations.Merge(note), null, mass, transition.IsotopeDistInfo, null, transition.Results); Assume.IsTrue((Math.Abs(newTransitionDocNode.Mz + mzShiftTransition - transition.Mz) - Math.Abs(transitionGroupDocNode.TransitionGroup.PrecursorCharge * BioMassCalc.MassElectron)) <= 1E-5, String.Format("unexpected mz difference {0}-{1}={2}", newTransitionDocNode.Mz , transition.Mz, newTransitionDocNode.Mz - transition.Mz)); // Not L10N newTransitionGroupDocNode = (TransitionGroupDocNode)newTransitionGroupDocNode.Add(newTransitionDocNode); } if (newPeptideDocNode != null) newPeptideDocNode = (PeptideDocNode)newPeptideDocNode.Add(newTransitionGroupDocNode); } newPeptideGroupDocNode = (PeptideGroupDocNode)newPeptideGroupDocNode.Add(newPeptideDocNode); } newdoc = (SrmDocument)newdoc.Add(newPeptideGroupDocNode); } } // No retention time prediction for small molecules (yet?) newdoc = newdoc.ChangeSettings(newdoc.Settings.ChangePeptideSettings(newdoc.Settings.PeptideSettings.ChangePrediction( newdoc.Settings.PeptideSettings.Prediction.ChangeRetentionTime(null)))); return newdoc; }