public static SrmDocument ImportFasta(SrmDocument document, string fastaPath, IProgressMonitor monitor, IdentityPath to, out IdentityPath firstAdded, out IdentityPath nextAdd, out List <PeptideGroupDocNode> peptideGroupsNew) { var importer = new FastaImporter(document, false); using (TextReader reader = File.OpenText(fastaPath)) { peptideGroupsNew = importer.Import(reader, monitor, Helpers.CountLinesInFile(fastaPath)).ToList(); document = document.AddPeptideGroups(peptideGroupsNew, false, to, out firstAdded, out nextAdd); } return(document); }
public static SrmDocument ImportFasta(SrmDocument document, string fastaPath, IProgressMonitor monitor, IdentityPath to, out IdentityPath firstAdded, out IdentityPath nextAdd, out int emptyProteinCount) { var importer = new FastaImporter(document, false); using (TextReader reader = File.OpenText(fastaPath)) { document = document.AddPeptideGroups(importer.Import(reader, monitor, Helpers.CountLinesInFile(fastaPath)), false, null, out firstAdded, out nextAdd); } emptyProteinCount = importer.EmptyPeptideGroupCount; return(document); }
public void ThermoMixedPeptidesTest() { var testFilesDir = new TestFilesDir(TestContext, ZIP_FILE); string docPath; SrmDocument docMixed = InitMixedDocument(testFilesDir, out docPath); FileEx.SafeDelete(Path.ChangeExtension(docPath, ChromatogramCache.EXT)); SrmDocument docUnmixed = InitUnmixedDocument(testFilesDir, out docPath); FileEx.SafeDelete(Path.ChangeExtension(docPath, ChromatogramCache.EXT)); string extRaw = ExtensionTestContext.ExtThermoRaw; var listChromatograms = new List <ChromatogramSet> { new ChromatogramSet("rep03", new[] { MsDataFileUri.Parse(testFilesDir.GetTestPath( "Site20_STUDY9P_PHASEII_QC_03" + extRaw)) }), new ChromatogramSet("rep05", new[] { MsDataFileUri.Parse(testFilesDir.GetTestPath( "Site20_STUDY9P_PHASEII_QC_05" + extRaw)) }) }; var docResults = docMixed.ChangeMeasuredResults(new MeasuredResults(listChromatograms)); var docContainerMixed = new ResultsTestDocumentContainer(docMixed, docPath); Assert.IsTrue(docContainerMixed.SetDocument(docResults, docMixed, true)); docContainerMixed.AssertComplete(); docMixed = docContainerMixed.Document; SrmDocument docMixedUnmixed = (SrmDocument)docMixed.ChangeChildren(new DocNode[0]); IdentityPath tempPath; docMixedUnmixed = docMixedUnmixed.AddPeptideGroups(docUnmixed.PeptideGroups, true, IdentityPath.ROOT, out tempPath, out tempPath); docResults = docUnmixed.ChangeMeasuredResults(new MeasuredResults(listChromatograms)); var docContainerUnmixed = new ResultsTestDocumentContainer(docUnmixed, docPath); Assert.IsTrue(docContainerUnmixed.SetDocument(docResults, docUnmixed, true)); docContainerUnmixed.AssertComplete(); docUnmixed = docContainerUnmixed.Document; AssertEx.DocumentCloned(docMixedUnmixed, docUnmixed); docContainerMixed.Release(); docContainerUnmixed.Release(); }
public SrmDocument AddFasta(SrmDocument document, ref IdentityPath selectedPath, out int emptyPeptideGroups) { emptyPeptideGroups = 0; var text = TbxFasta.Text; if (text.Length == 0) { return document; } if (!text.StartsWith(">")) // Not L10N { ShowFastaError(new PasteError { Message = Resources.ImportFastaHelper_AddFasta_This_must_start_with____, Column = 0, Length = 1, Line = 0, }); return null; } string[] lines = text.Split('\n'); int lastNameLine = -1; int aa = 0; for (int i = 0; i < lines.Length; i++) { string line = lines[i]; if (line.StartsWith(">")) // Not L10N { if (line.Trim().Length == 1) { ShowFastaError(new PasteError { Message = Resources.ImportFastaHelper_AddFasta_There_is_no_name_for_this_protein, Column = 0, Line = i, Length = 1 }); return null; } if (!CheckSequence(aa, lastNameLine, lines)) return null; lastNameLine = i; aa = 0; continue; } for (int column = 0; column < line.Length; column++) { char c = line[column]; if (AminoAcid.IsExAA(c)) aa++; else if (!Char.IsWhiteSpace(c) && c != '*') { ShowFastaError(new PasteError { Message = String.Format(Resources.ImportFastaHelper_AddFasta___0___is_not_a_capital_letter_that_corresponds_to_an_amino_acid_, c), Column = column, Line = i, Length = 1, }); return null; } } } if (!CheckSequence(aa, lastNameLine, lines)) return null; var importer = new FastaImporter(document, false); try { var reader = new StringReader(TbxFasta.Text); IdentityPath to = selectedPath; IdentityPath firstAdded, nextAdd; // TODO: support long-wait broker document = document.AddPeptideGroups(importer.Import(reader, null, -1), false, to, out firstAdded, out nextAdd); emptyPeptideGroups = importer.EmptyPeptideGroupCount; selectedPath = firstAdded; } catch (Exception exception) { ShowFastaError(new PasteError { Message = Resources.ImportFastaHelper_AddFasta_An_unexpected_error_occurred__ + exception.Message + " (" + exception.GetType() + ")" // Not L10N }); return null; } return document; }
private SrmDocument AddTransitionList(SrmDocument document, ref IdentityPath selectedPath) { if (tabControl1.SelectedTab != tabPageTransitionList) return document; if (IsMolecule) { // Save the current column order to settings var active = new List<string>(); for (int order = 0; order < gridViewTransitionList.Columns.Count; order++) { for (int gridcol = 0; gridcol < gridViewTransitionList.Columns.Count; gridcol++) { var dataGridViewColumn = gridViewTransitionList.Columns[gridcol]; if (dataGridViewColumn.DisplayIndex == order) { if (dataGridViewColumn.Visible) active.Add(dataGridViewColumn.Name); break; } } } Settings.Default.CustomMoleculeTransitionInsertColumnsList = active; // We will accept a completely empty product list as meaning // "these are all precursor transitions" var requireProductInfo = false; for (var i = 0; i < gridViewTransitionList.RowCount - 1; i++) { var row = gridViewTransitionList.Rows[i]; var productMz = row.Cells[INDEX_PRODUCT_MZ].Value; var productFormula = row.Cells[INDEX_PRODUCT_FORMULA].Value; var productCharge = row.Cells[INDEX_PRODUCT_CHARGE].Value; if ((productMz != null && productMz.ToString().Length > 0) || (productFormula != null && productFormula.ToString().Length > 0) || (productCharge != null && productCharge.ToString().Length > 0)) { requireProductInfo = true; // Product list is not completely empty break; } } // For each row in the grid, add to or begin MoleculeGroup|Molecule|TransitionList tree for(int i = 0; i < gridViewTransitionList.RowCount - 1; i ++) { DataGridViewRow row = gridViewTransitionList.Rows[i]; var precursor = ReadPrecursorOrProductColumns(document, row, true); // Get molecule values if (precursor == null) return null; if (requireProductInfo && ReadPrecursorOrProductColumns(document, row, false) == null) { return null; } var charge = precursor.Charge; var precursorMonoMz = BioMassCalc.CalculateIonMz(precursor.MonoMass, charge); var precursorAverageMz = BioMassCalc.CalculateIonMz(precursor.AverageMass, charge); // Preexisting molecule group? bool pepGroupFound = false; foreach (var pepGroup in document.MoleculeGroups) { var pathPepGroup = new IdentityPath(pepGroup.Id); if (Equals(pepGroup.Name, Convert.ToString(row.Cells[INDEX_MOLECULE_GROUP].Value))) { // Found a molecule group with the same name - can we find an existing transition group to which we can add a transition? pepGroupFound = true; bool pepFound = false; foreach (var pep in pepGroup.SmallMolecules) { var pepPath = new IdentityPath(pathPepGroup, pep.Id); var ionMonoMz = BioMassCalc.CalculateIonMz(pep.CustomIon.MonoisotopicMass, charge); var ionAverageMz = BioMassCalc.CalculateIonMz(pep.CustomIon.AverageMass, charge); // Match existing molecule if same name (if any) and same formula (if any) and similar m/z at the precursor charge // (we don't just check mass since we don't have a tolerance value for that) // Or same name If any) and identical formula when stripped of labels // Or same name, no formula, and different isotope labels if (Equals(pep.CustomIon.Name, precursor.Name) && ((Equals(pep.CustomIon.Formula, precursor.Formula) && Math.Abs(ionMonoMz - precursorMonoMz) <= document.Settings.TransitionSettings.Instrument.MzMatchTolerance && Math.Abs(ionAverageMz - precursorAverageMz) <= document.Settings.TransitionSettings.Instrument.MzMatchTolerance) || (!Equals(pep.CustomIon.Formula, precursor.Formula) && Equals(pep.CustomIon.UnlabeledFormula, BioMassCalc.MONOISOTOPIC.StripLabelsFromFormula(precursor.Formula))) || (string.IsNullOrEmpty(pep.CustomIon.Formula) && string.IsNullOrEmpty(precursor.Formula) && !pep.TransitionGroups.Any(t => Equals(t.TransitionGroup.LabelType, precursor.IsotopeLabelType??IsotopeLabelType.light))) )) { pepFound = true; bool tranGroupFound = false; foreach (var tranGroup in pep.TransitionGroups) { var pathGroup = new IdentityPath(pepPath, tranGroup.Id); if (Math.Abs(tranGroup.PrecursorMz - precursor.Mz) <= document.Settings.TransitionSettings.Instrument.MzMatchTolerance) { tranGroupFound = true; var tranFound = false; try { var tranNode = GetMoleculeTransition(document, row, pep.Peptide, tranGroup.TransitionGroup, requireProductInfo); if (tranNode == null) return null; foreach (var tran in tranGroup.Transitions) { if (Equals(tranNode.Transition.CustomIon,tran.Transition.CustomIon)) { tranFound = true; break; } } if (!tranFound) { document = (SrmDocument) document.Add(pathGroup, tranNode); } } catch (InvalidDataException e) { // Some error we didn't catch in the basic checks ShowTransitionError(new PasteError { Column = 0, Line = row.Index, Message = e.Message }); return null; } break; } } if (!tranGroupFound) { var node = GetMoleculeTransitionGroup(document, row, pep.Peptide, requireProductInfo); if (node == null) return null; document = (SrmDocument) document.Add(pepPath, node); } break; } } if (!pepFound) { var node = GetMoleculePeptide(document, row, pepGroup.PeptideGroup, requireProductInfo); if (node == null) return null; document = (SrmDocument) document.Add(pathPepGroup,node); } break; } } if (!pepGroupFound) { var node = GetMoleculePeptideGroup(document, row, requireProductInfo); if (node == null) return null; IdentityPath first; IdentityPath next; document = document.AddPeptideGroups(new[] {node}, false,null , out first,out next); } } } else { var backgroundProteome = GetBackgroundProteome(document); var sbTransitionList = new StringBuilder(); var dictNameSeq = new Dictionary<string, FastaSequence>(); // Add all existing FASTA sequences in the document to the name to seq dictionary // Including named peptide lists would cause the import code to give matching names // in this list new names (e.g. with 1, 2, 3 appended). In this code, the names // are intended to be merged. foreach (var nodePepGroup in document.Children.Cast<PeptideGroupDocNode>().Where(n => !n.IsPeptideList)) { if (!dictNameSeq.ContainsKey(nodePepGroup.Name)) dictNameSeq.Add(nodePepGroup.Name, (FastaSequence) nodePepGroup.PeptideGroup); } // Check for simple errors and build strings for import for (int i = 0; i < gridViewTransitionList.Rows.Count; i++) { var row = gridViewTransitionList.Rows[i]; var peptideSequence = Convert.ToString(row.Cells[colTransitionPeptide.Index].Value); var proteinName = Convert.ToString(row.Cells[colTransitionProteinName.Index].Value); var precursorMzText = Convert.ToString(row.Cells[colTransitionPrecursorMz.Index].Value); var productMzText = Convert.ToString(row.Cells[colTransitionProductMz.Index].Value); if (string.IsNullOrEmpty(peptideSequence) && string.IsNullOrEmpty(proteinName)) { continue; } if (string.IsNullOrEmpty(peptideSequence)) { ShowTransitionError(new PasteError { Column = colTransitionPeptide.Index, Line = i, Message = Resources.PasteDlg_ListPeptideSequences_The_peptide_sequence_cannot_be_blank }); return null; } if (!FastaSequence.IsExSequence(peptideSequence)) { ShowTransitionError(new PasteError { Column = colTransitionPeptide.Index, Line = i, Message = Resources.PasteDlg_ListPeptideSequences_This_peptide_sequence_contains_invalid_characters }); return null; } double mz; if (!double.TryParse(precursorMzText, out mz)) { ShowTransitionError(new PasteError { Column = colTransitionPrecursorMz.Index, Line = i, Message = Resources.PasteDlg_AddTransitionList_The_precursor_m_z_must_be_a_number_ }); return null; } if (!double.TryParse(productMzText, out mz)) { ShowTransitionError(new PasteError { Column = colTransitionProductMz.Index, Line = i, Message = Resources.PasteDlg_AddTransitionList_The_product_m_z_must_be_a_number_ }); return null; } const char sep = TRANSITION_LIST_SEPARATOR; // Add columns in order specified by TRANSITION_LIST_COL_INDICES sbTransitionList .Append(proteinName).Append(sep) .Append(peptideSequence).Append(sep) .Append(precursorMzText).Append(sep) .Append(productMzText).AppendLine(); // Build FASTA sequence text in cases where it is known if (!dictNameSeq.ContainsKey(proteinName)) { var fastaSeq = backgroundProteome.GetFastaSequence(proteinName); if (fastaSeq != null) dictNameSeq.Add(proteinName, fastaSeq); } } if (sbTransitionList.Length == 0) return document; // Do the actual import into PeptideGroupDocNodes IEnumerable<PeptideGroupDocNode> peptideGroupDocNodes; try { List<TransitionImportErrorInfo> errorList; List<MeasuredRetentionTime> irtPeptides; List<SpectrumMzInfo> librarySpectra; var inputs = new MassListInputs(sbTransitionList.ToString(), LocalizationHelper.CurrentCulture, TRANSITION_LIST_SEPARATOR); var importer = new MassListImporter(document, inputs); // TODO: support long-wait broker peptideGroupDocNodes = importer.Import(null, TRANSITION_LIST_COL_INDICES, dictNameSeq, out irtPeptides, out librarySpectra, out errorList); if (errorList.Any()) { var firstError = errorList[0]; if (firstError.Row.HasValue) { throw new LineColNumberedIoException(firstError.ErrorMessage, firstError.Row.Value, firstError.Column ?? -1); } else { throw new InvalidDataException(firstError.ErrorMessage); } } } catch (LineColNumberedIoException x) { var columns = new[] { colTransitionProteinName, colPeptideSequence, colTransitionPrecursorMz, colTransitionProductMz }; ShowTransitionError(new PasteError { Column = x.ColumnIndex >= 0 ? columns[x.ColumnIndex].Index : 0, Line = (int) x.LineNumber - 1, Message = x.PlainMessage }); return null; } catch (InvalidDataException x) { ShowTransitionError(new PasteError { Message = x.Message }); return null; } // Insert the resulting nodes into the document tree, merging when possible bool after = false; foreach (var nodePepGroup in peptideGroupDocNodes) { PeptideGroupDocNode nodePepGroupExist = FindPeptideGroupDocNode(document, nodePepGroup); if (nodePepGroupExist != null) { var nodePepGroupNew = nodePepGroupExist.Merge(nodePepGroup); if (!ReferenceEquals(nodePepGroupExist, nodePepGroupNew)) document = (SrmDocument) document.ReplaceChild(nodePepGroupNew); } else { // Add to the end, if no insert node var to = selectedPath; if (to == null || to.Depth < (int) SrmDocument.Level.MoleculeGroups) document = (SrmDocument) document.Add(nodePepGroup); else { Identity toId = selectedPath.GetIdentity((int) SrmDocument.Level.MoleculeGroups); document = (SrmDocument) document.Insert(toId, nodePepGroup, after); } selectedPath = new IdentityPath(nodePepGroup.Id); // All future insertions should be after, to avoid reversing the list after = true; } } } return document; }
public void TestModificationMatcher() { InitSeqs(); var carbC = StaticModList.GetDefaultsOn()[0]; // Test exception thrown if unable to match - mass. UpdateMatcherFail(STR_FAIL_MASS); UpdateMatcherFail(STR_FAIL_NOT_A_NUMBER); // Test exception thrown if unable to match - name. UpdateMatcherFail(STR_FAIL_NAME); // Can't match empty modifications. UpdateMatcherFail(STR_FAIL_EMPTY_MOD); UpdateMatcherFail(STR_FAIL_EMPTY_MOD2); // Can't match double modifications. UpdateMatcherFail(STR_FAIL_DOUBLE_MOD); // Test exception thrown if unimod not specified correctly UpdateMatcherFail(STR_FAIL_UNIMOD); UpdateMatcherFail(STR_UNKNOWN_UNIMOD); // Can't phosphorylate tryptophan UpdateMatcherFail(STR_FAIL_WRONG_AA_UNIMOD); // Can't put C-terminal modification in middle of peptide UpdateMatcherFail(STR_FAIL_UNIMOD_TERMINUS); // Test mods in UniMod match correctly. UpdateMatcher(StaticModList.GetDefaultsOn(), HeavyModList.GetDefaultsOn(), null, null); // A sequence with no modifications should not be explicitly modified. Assert.IsFalse(MATCHER.GetModifiedNode(STR_NO_MODS).HasExplicitMods); var nodeCysOxi = MATCHER.GetModifiedNode(STR_CYS_AND_OXI); Assert.IsTrue(nodeCysOxi.HasExplicitMods); Assert.IsFalse(nodeCysOxi.ExplicitMods.HasHeavyModifications); // Modifications should match by name. Assert.IsTrue(MATCHER.GetModifiedNode(STR_MOD_BY_NAME).ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification.Name, "Phospho (ST)"))); // Test can find terminal modification Assert.IsTrue(MATCHER.GetModifiedNode(STR_TERM_ONLY).ExplicitMods.HeavyModifications.Contains(mod => mod.Modification.EquivalentAll(UniMod.GetModification("Label:13C(6) (C-term R)", false)))); // Test can find matches on terminus that are not terminal Assert.IsTrue(MATCHER.GetModifiedNode(STR_MOD_BY_NAME).ExplicitMods.StaticModifications.Contains(mod => mod.Modification.Terminus == null)); // Test matching negative masses Assert.IsTrue(MATCHER.GetModifiedNode(STR_AMMONIA_LOSS).ExplicitMods.StaticModifications.Contains(mod => mod.Modification.EquivalentAll(UniMod.GetModification("Ammonia-loss (N-term C)", true)))); // General and specific // If all AAs modified, try for most general modification. Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15) .ExplicitMods.HeavyModifications.Contains(mod => mod.Modification.Equivalent(LABEL15_N))); // Updating the settings. // Peptide settings should change to include new mods. var docNew = new SrmDocument(SrmSettingsList.GetDefault()); IdentityPath firstAdded; IdentityPath nextAdded; docNew = docNew.AddPeptideGroups(new[] { new PeptideGroupDocNode(new PeptideGroup(), "PepGroup1", "", new[] { MATCHER.GetModifiedNode(STR_MOD_BY_NAME) }) }, true, null, out firstAdded, out nextAdded); var pepSetNew = MATCHER.GetDocModifications(docNew); Assert.IsTrue(pepSetNew.StaticModifications.Contains(UniMod.GetModification("Phospho (ST)", true).ChangeExplicit(true))); // Update the document to the new settings. var pepSetNew1 = pepSetNew; var settingsNew2 = docNew.Settings.ChangePeptideModifications(mods => pepSetNew1); var lightGlobalMods = new MappedList <string, StaticMod>(); lightGlobalMods.AddRange(settingsNew2.PeptideSettings.Modifications.StaticModifications); var heavyGlobalMods = new MappedList <string, StaticMod>(); heavyGlobalMods.AddRange(settingsNew2.PeptideSettings.Modifications.AllHeavyModifications); // Match again. Test FoundMatches string should now be empty. MATCHER.CreateMatches(docNew.Settings.ChangePeptideModifications(mods => pepSetNew1), new List <string> { STR_MOD_BY_NAME }, lightGlobalMods, heavyGlobalMods); Assert.IsTrue(string.IsNullOrEmpty(MATCHER.FoundMatches)); // Adding 15N to the settings. UpdateMatcher(new[] { carbC }, new[] { LABEL15_N }, null, null); // Test sequences with only explicit heavy mods should not have explicit light mods Assert.IsNull(MATCHER.GetModifiedNode(STR_HEAVY_ONLY).ExplicitMods.StaticModifications); // Test sequences with only explicit light mods should not have explicit heavy mods Assert.IsFalse(MATCHER.GetModifiedNode(STR_LIGHT_ONLY).ExplicitMods.HasHeavyModifications); // Test global mods take precendence over UniMod UpdateMatcher(new[] { carbC }, null, new[] { OXIDATION_M_GLOBAL }, new[] { LABEL15_N }); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).ExplicitMods.StaticModifications .Contains(mod => Equals(mod.Modification, OXIDATION_M_GLOBAL))); // Test document mods take precendence over UniMod UpdateMatcher(new[] { carbC, METHIONINE_OXIDATION }, null, new[] { OXIDATION_M_GLOBAL }, new[] { LABEL15_N }); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).HasExplicitMods); // Test exception thrown if match doesn't make sense - wrong AA. UpdateMatcherFail(STR_FAIL_OX_ON_D); // Test exception thrown if match doesn't make sense - wrong terminus. _seqs.Add(STR_FAIL_OX_TERM); AssertEx.ThrowsException <FormatException>(() => UpdateMatcher(new[] { OXIDATION_M_C_TERM }, null, null, null)); _seqs.Remove(STR_FAIL_OX_TERM); // Heavy 15N - All AAs. UpdateMatcher(new[] { carbC, METHIONINE_OXIDATION }, new[] { LABEL15_N }, null, null); // Node should be created from document settings if possible. Assert.IsNull(MATCHER.GetModifiedNode(STR_HEAVY_15).ExplicitMods); // Heavy 15N - specific AA. // If only a specific AA is modified, there must be an explicit mod. Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15_F).HasExplicitMods); // Test variable mods match correctly. // Put variable mod in global mod and not on doc - make sure don't get variable mod, // should get explicit mod in that case. var variableMetOx = METHIONINE_OXIDATION.ChangeVariable(true); UpdateMatcher(new[] { carbC }, null, new[] { variableMetOx }, null); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).HasExplicitMods); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_OXI_PHOS).ExplicitMods.IsVariableStaticMods); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_OXI_PHOS_CAP).ExplicitMods.IsVariableStaticMods); // Add variable mod to doc UpdateMatcher(new[] { carbC, variableMetOx }, null, null, null); // Mod can be created by the settings. Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).HasExplicitMods); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).ExplicitMods.IsVariableStaticMods); // Mod cannot be created by the settings. Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_OXI_PHOS).ExplicitMods.IsVariableStaticMods); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_OXI_PHOS_CAP).ExplicitMods.IsVariableStaticMods); // Add Met Ox to global. Test: +16 finds it. UpdateMatcher(new[] { carbC }, null, new[] { MET_OX_ROUNDED }, null); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI). ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, MET_OX_ROUNDED))); // Test: +15.99 finds UniMod. Assert.IsFalse(MATCHER.GetModifiedNode(STR_HEAVY_15). ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, MET_OX_ROUNDED))); // Add Methionine Oxidation before Met Ox. Test: +16 finds it. UpdateMatcher(new[] { carbC }, null, new[] { METHIONINE_OXIDATION, MET_OX_ROUNDED }, null); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_AND_OXI). ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, MET_OX_ROUNDED))); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI). ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, METHIONINE_OXIDATION))); // Test long masses rounded. Assert.IsTrue(MATCHER.GetModifiedNode(STR_METOX_LONG_MASS).ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, METHIONINE_OXIDATION))); // Test UniMod label types var node = MATCHER.GetModifiedNode(STR_UNIMOD_LABEL); Assert.IsNotNull(node); Assert.IsNull(node.ExplicitMods.StaticModifications); Assert.IsTrue(node.ExplicitMods.HeavyModifications.Contains(mod => Equals(mod.Modification, N_TERM_LABEL))); UpdateMatcherWithNoSequences(new[] { carbC }, new[] { N_TERM_LABEL }, new[] { METHIONINE_OXIDATION, MET_OX_ROUNDED }, null); var nodeNew = MATCHER.GetModifiedNode(STR_UNIMOD_LABEL); Assert.IsNotNull(nodeNew); Assert.IsTrue(nodeNew.TransitionGroups.Any(group => Equals(group.TransitionGroup.LabelType, IsotopeLabelType.heavy))); UpdateMatcher(new[] { carbC }, null, new[] { METHIONINE_OXIDATION, MET_OX_ROUNDED }, null); // Test case where there are lots of unimod labels var nodeUniAll = MATCHER.GetModifiedNode(STR_UNIMOD_ALL); Assert.AreEqual(nodeUniAll.ExplicitMods.HeavyModifications.Count, 10); Assert.IsNull(nodeUniAll.ExplicitMods.StaticModifications); foreach (var mod in nodeUniAll.ExplicitMods.HeavyModifications) { Assert.AreEqual(mod.Modification.ShortName, "+01"); Assert.AreEqual(mod.Modification.UnimodId, 994); } // Test unimod terminal label var nodeUniTerm = MATCHER.GetModifiedNode(STR_UNIMOD_TERMINUS); Assert.AreEqual(nodeUniTerm.ExplicitMods.HeavyModifications.Count, 1); Assert.IsNull(nodeUniTerm.ExplicitMods.StaticModifications); Assert.AreEqual(nodeUniTerm.ExplicitMods.HeavyModifications[0].Modification.Terminus, ModTerminus.C); Assert.AreEqual(nodeUniTerm.ExplicitMods.HeavyModifications[0].Modification.UnimodId, 298); // Basic multi-label test var heavyLabelType2 = new IsotopeLabelType("Heavy2", 1); var typedMod = new TypedModifications(heavyLabelType2, new List <StaticMod> { LABEL15_N }); var peptideMods = new PeptideModifications(new List <StaticMod>(), new List <TypedModifications> { typedMod }); var settingsMultiLabel = SrmSettingsList.GetDefault().ChangePeptideModifications(mods => peptideMods); var defSetSetLight = new MappedList <string, StaticMod>(); defSetSetLight.AddRange(StaticModList.GetDefaultsOn()); var defSetHeavy = new MappedList <string, StaticMod>(); defSetHeavy.AddRange(HeavyModList.GetDefaultsOn()); defSetHeavy.Add(LABEL15_N); MATCHER.CreateMatches(settingsMultiLabel, new List <string> { STR_HEAVY_15_F }, defSetSetLight, defSetHeavy); Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15_F).ExplicitMods.GetHeavyModifications().Contains(mod => Equals(mod.LabelType, heavyLabelType2))); // Peptide settings should not change. var docNew0 = new SrmDocument(settingsMultiLabel).AddPeptideGroups(new[] { new PeptideGroupDocNode(new PeptideGroup(), "PepGroup1", "", new[] { MATCHER.GetModifiedNode(STR_HEAVY_15_F) }) }, true, null, out firstAdded, out nextAdded); var settingsNew = MATCHER.GetDocModifications(docNew0); Assert.AreEqual(settingsMultiLabel.PeptideSettings.Modifications.ChangeHasHeavyModifications(false), settingsNew.ChangeHasHeavyModifications(false)); // Finding specific modifications. // If only specific AA modified, try for most specific modification. UpdateMatcher(null, null, null, null, new[] { STR_HEAVY_15_F }); Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15_F) .ExplicitMods.HeavyModifications.Contains(mod => mod.Modification.AminoAcids.Contains(c => c == 'F'))); // If only some AAs modified, try for most specific modifications. UpdateMatcher(null, null, null, null, new[] { STR_HEAVY_15_NOT_ALL }); Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15_NOT_ALL) .ExplicitMods.HeavyModifications.Contains(mod => mod.Modification.AminoAcids.Contains(c => c == 'I'))); using (var testDir = new TestFilesDir(TestContext, ZIP_FILE)) using (var modMatchDocContainer = InitMatchDocContainer(testDir)) { var libkeyModMatcher = new LibKeyModificationMatcher(); var anlLibSpec = new BiblioSpecLiteSpec("ANL_Combo", testDir.GetTestPath("ANL_Combined.blib")); var yeastLibSpec = new BiblioSpecLiteSpec("Yeast", testDir.GetTestPath("Yeast_atlas_small.blib")); modMatchDocContainer.ChangeLibSpecs(new[] { anlLibSpec, yeastLibSpec }); var docLibraries = modMatchDocContainer.Document.Settings.PeptideSettings.Libraries.Libraries; int anlLibIndex = docLibraries.IndexOf(library => Equals(library.Name, anlLibSpec.Name)); int yeastLibIndex = docLibraries.IndexOf(library => Equals(library.Name, yeastLibSpec.Name)); libkeyModMatcher.CreateMatches(modMatchDocContainer.Document.Settings, docLibraries[anlLibIndex].Keys, defSetSetLight, defSetHeavy); // Test can match 15N Assert.IsTrue(libkeyModMatcher.Matches.Values.Contains(match => match.HeavyMod != null && match.HeavyMod.Equivalent(LABEL15_N))); var uniModMetOx = UniMod.GetModification("Oxidation (M)", true); // Test can match Met Ox Assert.IsTrue(libkeyModMatcher.Matches.Values.Contains(match => match.StructuralMod != null && match.StructuralMod.Equivalent(uniModMetOx))); // Test can match 15N and Met ox! Assert.IsTrue(libkeyModMatcher.Matches.Contains(match => match.Key.Mass == 17 && match.Value.StructuralMod != null && match.Value.StructuralMod.Equivalent(uniModMetOx) && match.Value.HeavyMod != null && match.Value.HeavyMod.Equivalent(LABEL15_N))); // Test can match Cysteine (Implicit) and Met Ox (variable) libkeyModMatcher.CreateMatches(modMatchDocContainer.Document.Settings, docLibraries[yeastLibIndex].Keys, defSetSetLight, defSetHeavy); Assert.IsTrue(libkeyModMatcher.MatcherPepMods.StaticModifications.Contains(mod => mod.Formula.Equals(UniMod.GetModification(StaticModList.DEFAULT_NAME, true).Formula) && !mod.IsVariable)); Assert.IsTrue(libkeyModMatcher.MatcherPepMods.StaticModifications.Contains(mod => mod.Formula.Equals("O") && mod.IsVariable)); } }
private static SrmDocument AddPeptidesToLibraryGroup(SrmDocument document, ICollection <PeptideMatch> listMatches, ILongWaitBroker broker, IdentityPath toPath, out IdentityPath selectedPath) { // Get starting progress values int startPercent = (broker != null ? broker.ProgressValue : 0); int processedPercent = 0; int processedCount = 0; int totalMatches = listMatches.Count; var listPeptides = new List <PeptideDocNode>(); var hasSmallMolecules = false; foreach (var match in listMatches) { // Show progress, if in a long wait if (broker != null) { if (broker.IsCanceled) { selectedPath = null; return(document); } processedCount++; int processPercentNow = processedCount * (100 - startPercent) / totalMatches; if (processedPercent != processPercentNow) { processedPercent = processPercentNow; broker.ProgressValue = startPercent + processedPercent; } } listPeptides.Add(match.NodePep.ChangeSettings(document.Settings, SrmSettingsDiff.ALL)); hasSmallMolecules |= !match.NodePep.IsProteomic; } bool hasVariable = listPeptides.Contains(nodePep => nodePep.HasExplicitMods && nodePep.ExplicitMods.IsVariableStaticMods); // Use existing group by this name, if present. var nodeName = hasSmallMolecules ? Resources.ViewLibraryPepMatching_AddPeptidesToLibraryGroup_Library_Molecules : Resources.ViewLibraryPepMatching_AddPeptidesToLibraryGroup_Library_Peptides; var nodePepGroupNew = FindPeptideGroupDocNode(document, nodeName); if (nodePepGroupNew != null) { var newChildren = nodePepGroupNew.Children.ToList(); newChildren.AddRange(listPeptides.ConvertAll(nodePep => (DocNode)nodePep)); selectedPath = (listPeptides.Count == 1 ? new IdentityPath(nodePepGroupNew.Id, listPeptides[0].Id) : toPath); nodePepGroupNew = (PeptideGroupDocNode)nodePepGroupNew.ChangeChildren(newChildren); if (hasVariable) { nodePepGroupNew = (PeptideGroupDocNode)nodePepGroupNew.ChangeAutoManageChildren(false); } return((SrmDocument)document.ReplaceChild(nodePepGroupNew)); } else { nodePepGroupNew = new PeptideGroupDocNode(new PeptideGroup(), nodeName, string.Empty, listPeptides.ToArray()); if (hasVariable) { nodePepGroupNew = (PeptideGroupDocNode)nodePepGroupNew.ChangeAutoManageChildren(false); } IdentityPath nextAdd; document = document.AddPeptideGroups(new[] { nodePepGroupNew }, true, toPath, out selectedPath, out nextAdd); selectedPath = new IdentityPath(selectedPath, nodePepGroupNew.Children[0].Id); return(document); } }
/// <summary> /// Adds all peptides which can be matched to a background proteome to the /// proteins in the background proteins, and returns a new document with those /// proteins and peptides added. /// </summary> /// <param name="document">The starting document</param> /// <param name="dictCopy">A dictionary of peptides to peptide matches. All added /// peptides are removed</param> /// <param name="broker">For reporting long wait status</param> /// <param name="toPath">Path to the location in the document to add new items</param> /// <param name="selectedPath">Path to item in the document that should be selected /// after this operation is complete</param> /// <returns>A new document with matching peptides and their proteins addded</returns> private SrmDocument AddProteomePeptides(SrmDocument document, Dictionary <PeptideSequenceModKey, PeptideMatch> dictCopy, ILongWaitBroker broker, IdentityPath toPath, out IdentityPath selectedPath) { // Build a list of new PeptideGroupDocNodes to add to the document. var dictPeptideGroupsNew = new Dictionary <string, PeptideGroupDocNode>(); // Get starting progress values int startPercent = (broker != null ? broker.ProgressValue : 0); int processedPercent = 0; int processedCount = 0; int totalMatches = dictCopy.Count; // Just to make sure this is set selectedPath = toPath; foreach (PeptideMatch pepMatch in dictCopy.Values) { // Show progress, if in a long wait if (broker != null) { if (broker.IsCanceled) { selectedPath = toPath; return(document); } // All peptides with protein get processed in this loop. Peptides // without proteins get added later. if (pepMatch.Proteins != null) { processedCount++; } int processPercentNow = processedCount * (100 - startPercent) / totalMatches; if (processedPercent != processPercentNow) { processedPercent = processPercentNow; broker.ProgressValue = startPercent + processedPercent; } } // Peptide should be added to the document, // unless the NoDuplicates radio was selected and the peptide has more than 1 protein associated with it. if (pepMatch.Proteins == null || (FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.NoDuplicates && pepMatch.Proteins.Count > 1)) { continue; } foreach (ProteinInfo protein in pepMatch.Proteins) { // Look for the protein in the document. string name = protein.ProteinMetadata.Name; var peptideGroupDocNode = FindPeptideGroupDocNode(document, name); bool foundInDoc = peptideGroupDocNode != null; bool foundInList = false; if (!foundInDoc) { // If the protein is not already in the document, // check to see if we have already created a PeptideGroupDocNode for it. if (name != null && dictPeptideGroupsNew.TryGetValue(name, out peptideGroupDocNode)) { foundInList = true; } // If not, create a new PeptideGroupDocNode. else { List <ProteinMetadata> alternativeProteins = new List <ProteinMetadata>(protein.Alternatives); peptideGroupDocNode = new PeptideGroupDocNode( new FastaSequence(name, protein.ProteinMetadata.Description, alternativeProteins, protein.Sequence), null, null, new PeptideDocNode[0]); } } // Create a new peptide that matches this protein. var fastaSequence = peptideGroupDocNode.PeptideGroup as FastaSequence; var peptideSequence = pepMatch.NodePep.Peptide.Target.Sequence; // ReSharper disable PossibleNullReferenceException var begin = fastaSequence.Sequence.IndexOf(peptideSequence, StringComparison.Ordinal); // ReSharper restore PossibleNullReferenceException // Create a new PeptideDocNode using this peptide. var newPeptide = new Peptide(fastaSequence, peptideSequence, begin, begin + peptideSequence.Length, Settings.PeptideSettings.Enzyme.CountCleavagePoints(peptideSequence)); // Make sure we keep the same children. PeptideMatch match = pepMatch; var newNodePep = ((PeptideDocNode) new PeptideDocNode(newPeptide, pepMatch.NodePep.ExplicitMods, pepMatch.NodePep.ExplicitRetentionTime) .ChangeChildren(pepMatch.NodePep.Children.ToList().ConvertAll(nodeGroup => { // Create copies of the children in order to prevent transition groups with the same // global indices. var nodeTranGroup = (TransitionGroupDocNode)nodeGroup; if (match.Proteins != null && match.Proteins.Count > 1) { nodeTranGroup = (TransitionGroupDocNode)nodeTranGroup.CopyId(); nodeTranGroup = (TransitionGroupDocNode)nodeTranGroup.ChangeChildren( nodeTranGroup.Children.ToList().ConvertAll(nodeTran => nodeTran.CopyId())); } return((DocNode)nodeTranGroup); })).ChangeAutoManageChildren(false)).ChangeSettings(document.Settings, SrmSettingsDiff.ALL); // If this PeptideDocNode is already a child of the PeptideGroupDocNode, // ignore it. if (peptideGroupDocNode.Children.Contains(nodePep => Equals(((PeptideDocNode)nodePep).Key, newNodePep.Key))) { Console.WriteLine(Resources.ViewLibraryPepMatching_AddProteomePeptides_Skipping__0__already_present, newNodePep.Peptide.Target); continue; } // Otherwise, add it to the list of children for the PeptideGroupNode. var newChildren = peptideGroupDocNode.Children.Cast <PeptideDocNode>().ToList(); newChildren.Add(newNodePep); newChildren.Sort(FastaSequence.ComparePeptides); // Store modified proteins by global index in a HashSet for second pass. var newPeptideGroupDocNode = peptideGroupDocNode.ChangeChildren(newChildren.Cast <DocNode>().ToArray()) .ChangeAutoManageChildren(false); // If the protein was already in the document, replace with the new PeptideGroupDocNode. if (foundInDoc) { document = (SrmDocument)document.ReplaceChild(newPeptideGroupDocNode); } // Otherwise, update the list of new PeptideGroupDocNodes to add. else { if (foundInList) { dictPeptideGroupsNew.Remove(peptideGroupDocNode.Name); } dictPeptideGroupsNew.Add(peptideGroupDocNode.Name, (PeptideGroupDocNode)newPeptideGroupDocNode); } // If we are only adding a single node, select it. if (PeptideMatches.Count == 1) { selectedPath = new IdentityPath(new[] { peptideGroupDocNode.Id, newNodePep.Peptide }); } // If the user only wants to add the first protein found, // we break the foreach loop after peptide has been added to its first protein.) if (FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.FirstOccurence) { break; } } } if (dictPeptideGroupsNew.Count == 0) { return(document); } // Sort the peptides. var nodePepGroupsSortedChildren = new List <PeptideGroupDocNode>(); foreach (PeptideGroupDocNode nodePepGroup in dictPeptideGroupsNew.Values) { var newChildren = nodePepGroup.Children.ToList(); // Have to cast all children to PeptideDocNodes in order to sort. var newChildrenNodePeps = newChildren.Cast <PeptideDocNode>().ToList(); newChildrenNodePeps.Sort(FastaSequence.ComparePeptides); nodePepGroupsSortedChildren.Add((PeptideGroupDocNode) nodePepGroup.ChangeChildren(newChildrenNodePeps.Cast <DocNode>().ToArray())); } // Sort the proteins. nodePepGroupsSortedChildren.Sort((node1, node2) => Comparer <string> .Default.Compare(node1.Name, node2.Name)); IdentityPath selPathTemp = selectedPath, nextAdd; document = document.AddPeptideGroups(nodePepGroupsSortedChildren, false, toPath, out selectedPath, out nextAdd); selectedPath = PeptideMatches.Count == 1 ? selPathTemp : selectedPath; return(document); }
public void TestModificationMatcher() { InitSeqs(); var carbC = StaticModList.GetDefaultsOn()[0]; // Test exception thrown if unable to match - mass. UpdateMatcherFail(STR_FAIL_MASS); UpdateMatcherFail(STR_FAIL_NOT_A_NUMBER); // Test exception thrown if unable to match - name. UpdateMatcherFail(STR_FAIL_NAME); // Can't match empty modifications. UpdateMatcherFail(STR_FAIL_EMPTY_MOD); UpdateMatcherFail(STR_FAIL_EMPTY_MOD2); // Can't match double modifications. UpdateMatcherFail(STR_FAIL_DOUBLE_MOD); // Test exception thrown if unimod not specified correctly UpdateMatcherFail(STR_FAIL_UNIMOD); UpdateMatcherFail(STR_UNKNOWN_UNIMOD); // Can't phosphorylate tryptophan UpdateMatcherFail(STR_FAIL_WRONG_AA_UNIMOD); // Can't put C-terminal modification in middle of peptide UpdateMatcherFail(STR_FAIL_UNIMOD_TERMINUS); // Test mods in UniMod match correctly. UpdateMatcher(StaticModList.GetDefaultsOn(), HeavyModList.GetDefaultsOn(), null, null); // A sequence with no modifications should not be explicitly modified. Assert.IsFalse(MATCHER.GetModifiedNode(STR_NO_MODS).HasExplicitMods); var nodeCysOxi = MATCHER.GetModifiedNode(STR_CYS_AND_OXI); Assert.IsTrue(nodeCysOxi.HasExplicitMods); Assert.IsFalse(nodeCysOxi.ExplicitMods.HasHeavyModifications); // Modifications should match by name. Assert.IsTrue(MATCHER.GetModifiedNode(STR_MOD_BY_NAME).ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification.Name, "Phospho (ST)"))); // Test can find terminal modification Assert.IsTrue(MATCHER.GetModifiedNode(STR_TERM_ONLY).ExplicitMods.HeavyModifications.Contains(mod => mod.Modification.EquivalentAll(UniMod.GetModification("Label:13C(6) (C-term R)", false)))); // Test can find matches on terminus that are not terminal Assert.IsTrue(MATCHER.GetModifiedNode(STR_MOD_BY_NAME).ExplicitMods.StaticModifications.Contains(mod => mod.Modification.Terminus == null)); // Test matching negative masses Assert.IsTrue(MATCHER.GetModifiedNode(STR_AMMONIA_LOSS).ExplicitMods.StaticModifications.Contains(mod => mod.Modification.EquivalentAll(UniMod.GetModification("Ammonia-loss (N-term C)", true)))); // General and specific // If all AAs modified, try for most general modification. Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15) .ExplicitMods.HeavyModifications.Contains(mod => mod.Modification.Equivalent(LABEL15_N))); // Updating the settings. // Peptide settings should change to include new mods. var docNew = new SrmDocument(SrmSettingsList.GetDefault()); IdentityPath firstAdded; IdentityPath nextAdded; docNew = docNew.AddPeptideGroups(new[] { new PeptideGroupDocNode(new PeptideGroup(), "PepGroup1", "", new[] {MATCHER.GetModifiedNode(STR_MOD_BY_NAME)})}, true, null, out firstAdded, out nextAdded); var pepSetNew = MATCHER.GetDocModifications(docNew); Assert.IsTrue(pepSetNew.StaticModifications.Contains(UniMod.GetModification("Phospho (ST)", true).ChangeExplicit(true))); // Update the document to the new settings. var pepSetNew1 = pepSetNew; var settingsNew2 = docNew.Settings.ChangePeptideModifications(mods => pepSetNew1); var lightGlobalMods = new MappedList<string, StaticMod>(); lightGlobalMods.AddRange(settingsNew2.PeptideSettings.Modifications.StaticModifications); var heavyGlobalMods = new MappedList<string, StaticMod>(); heavyGlobalMods.AddRange(settingsNew2.PeptideSettings.Modifications.HeavyModifications); // Match again. Test FoundMatches string should now be empty. MATCHER.CreateMatches(docNew.Settings.ChangePeptideModifications(mods => pepSetNew1), new List<string> { STR_MOD_BY_NAME }, lightGlobalMods, heavyGlobalMods); Assert.IsTrue(string.IsNullOrEmpty(MATCHER.FoundMatches)); // Adding 15N to the settings. UpdateMatcher(new[] { carbC }, new[] { LABEL15_N }, null, null); // Test sequences with only explicit heavy mods should not have explicit light mods Assert.IsNull(MATCHER.GetModifiedNode(STR_HEAVY_ONLY).ExplicitMods.StaticModifications); // Test sequences with only explicit light mods should not have explicit heavy mods Assert.IsFalse(MATCHER.GetModifiedNode(STR_LIGHT_ONLY).ExplicitMods.HasHeavyModifications); // Test global mods take precendence over UniMod UpdateMatcher(new[] { carbC }, null, new[] { OXIDATION_M_GLOBAL }, new[] { LABEL15_N }); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).ExplicitMods.StaticModifications .Contains(mod => Equals(mod.Modification, OXIDATION_M_GLOBAL))); // Test document mods take precendence over UniMod UpdateMatcher(new[] { carbC, METHIONINE_OXIDATION }, null, new[] { OXIDATION_M_GLOBAL }, new[] { LABEL15_N }); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).HasExplicitMods); // Test exception thrown if match doesn't make sense - wrong AA. UpdateMatcherFail(STR_FAIL_OX_ON_D); // Test exception thrown if match doesn't make sense - wrong terminus. _seqs.Add(STR_FAIL_OX_TERM); AssertEx.ThrowsException<FormatException>(() => UpdateMatcher(new[] {OXIDATION_M_C_TERM}, null, null, null)); _seqs.Remove(STR_FAIL_OX_TERM); // Heavy 15N - All AAs. UpdateMatcher(new[] { carbC, METHIONINE_OXIDATION }, new[] {LABEL15_N}, null, null); // Node should be created from document settings if possible. Assert.IsNull(MATCHER.GetModifiedNode(STR_HEAVY_15).ExplicitMods); // Heavy 15N - specific AA. // If only a specific AA is modified, there must be an explicit mod. Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15_F).HasExplicitMods); // Test variable mods match correctly. // Put variable mod in global mod and not on doc - make sure don't get variable mod, // should get explicit mod in that case. var variableMetOx = METHIONINE_OXIDATION.ChangeVariable(true); UpdateMatcher(new[] { carbC }, null, new[] { variableMetOx }, null); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).HasExplicitMods); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_OXI_PHOS).ExplicitMods.IsVariableStaticMods); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_OXI_PHOS_CAP).ExplicitMods.IsVariableStaticMods); // Add variable mod to doc UpdateMatcher(new[] { carbC, variableMetOx }, null, null, null); // Mod can be created by the settings. Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).HasExplicitMods); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI).ExplicitMods.IsVariableStaticMods); // Mod cannot be created by the settings. Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_OXI_PHOS).ExplicitMods.IsVariableStaticMods); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_OXI_PHOS_CAP).ExplicitMods.IsVariableStaticMods); // Add Met Ox to global. Test: +16 finds it. UpdateMatcher(new[] {carbC}, null, new[] {MET_OX_ROUNDED}, null); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI). ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, MET_OX_ROUNDED))); // Test: +15.99 finds UniMod. Assert.IsFalse(MATCHER.GetModifiedNode(STR_HEAVY_15). ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, MET_OX_ROUNDED))); // Add Methionine Oxidation before Met Ox. Test: +16 finds it. UpdateMatcher(new[] { carbC }, null, new[] { METHIONINE_OXIDATION, MET_OX_ROUNDED }, null); Assert.IsFalse(MATCHER.GetModifiedNode(STR_CYS_AND_OXI). ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, MET_OX_ROUNDED))); Assert.IsTrue(MATCHER.GetModifiedNode(STR_CYS_AND_OXI). ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, METHIONINE_OXIDATION))); // Test long masses rounded. Assert.IsTrue(MATCHER.GetModifiedNode(STR_METOX_LONG_MASS).ExplicitMods.StaticModifications.Contains(mod => Equals(mod.Modification, METHIONINE_OXIDATION))); // Test UniMod label types var node = MATCHER.GetModifiedNode(STR_UNIMOD_LABEL); Assert.IsNotNull(node); Assert.IsNull(node.ExplicitMods.StaticModifications); Assert.IsTrue(node.ExplicitMods.HeavyModifications.Contains(mod => Equals(mod.Modification, N_TERM_LABEL))); UpdateMatcherWithNoSequences(new[] { carbC }, new[] { N_TERM_LABEL }, new[] { METHIONINE_OXIDATION, MET_OX_ROUNDED }, null); var nodeNew = MATCHER.GetModifiedNode(STR_UNIMOD_LABEL); Assert.IsNotNull(nodeNew); Assert.IsTrue(nodeNew.TransitionGroups.Any(group => Equals(group.TransitionGroup.LabelType, IsotopeLabelType.heavy))); UpdateMatcher(new[] { carbC }, null, new[] { METHIONINE_OXIDATION, MET_OX_ROUNDED }, null); // Test case where there are lots of unimod labels var nodeUniAll = MATCHER.GetModifiedNode(STR_UNIMOD_ALL); Assert.AreEqual(nodeUniAll.ExplicitMods.HeavyModifications.Count, 10); Assert.IsNull(nodeUniAll.ExplicitMods.StaticModifications); foreach (var mod in nodeUniAll.ExplicitMods.HeavyModifications) { Assert.AreEqual(mod.Modification.ShortName, "+01"); Assert.AreEqual(mod.Modification.UnimodId, 994); } // Test unimod terminal label var nodeUniTerm = MATCHER.GetModifiedNode(STR_UNIMOD_TERMINUS); Assert.AreEqual(nodeUniTerm.ExplicitMods.HeavyModifications.Count, 1); Assert.IsNull(nodeUniTerm.ExplicitMods.StaticModifications); Assert.AreEqual(nodeUniTerm.ExplicitMods.HeavyModifications[0].Modification.Terminus, ModTerminus.C); Assert.AreEqual(nodeUniTerm.ExplicitMods.HeavyModifications[0].Modification.UnimodId, 298); // Basic multi-label test var heavyLabelType2 = new IsotopeLabelType("Heavy2", 1); var typedMod = new TypedModifications(heavyLabelType2, new List<StaticMod> { LABEL15_N }); var peptideMods = new PeptideModifications(new List<StaticMod>(), new List<TypedModifications> { typedMod }); var settingsMultiLabel = SrmSettingsList.GetDefault().ChangePeptideModifications(mods => peptideMods); var defSetSetLight = new MappedList<string, StaticMod>(); defSetSetLight.AddRange(StaticModList.GetDefaultsOn()); var defSetHeavy = new MappedList<string, StaticMod>(); defSetHeavy.AddRange(HeavyModList.GetDefaultsOn()); defSetHeavy.Add( LABEL15_N ); MATCHER.CreateMatches(settingsMultiLabel, new List<string> { STR_HEAVY_15_F }, defSetSetLight, defSetHeavy); Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15_F).ExplicitMods.GetHeavyModifications().Contains(mod => Equals(mod.LabelType, heavyLabelType2))); // Peptide settings should not change. var docNew0 = new SrmDocument(settingsMultiLabel).AddPeptideGroups(new[] { new PeptideGroupDocNode(new PeptideGroup(), "PepGroup1", "", new[] {MATCHER.GetModifiedNode(STR_HEAVY_15_F)})}, true, null, out firstAdded, out nextAdded); var settingsNew = MATCHER.GetDocModifications(docNew0); Assert.AreEqual(settingsMultiLabel.PeptideSettings.Modifications, settingsNew); // Finding specific modifications. // If only specific AA modified, try for most specific modification. UpdateMatcher(null, null, null, null, new[] { STR_HEAVY_15_F}); Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15_F) .ExplicitMods.HeavyModifications.Contains(mod => mod.Modification.AminoAcids.Contains(c => c == 'F'))); // If only some AAs modified, try for most specific modifications. UpdateMatcher(null, null, null, null, new[] { STR_HEAVY_15_NOT_ALL }); Assert.IsTrue(MATCHER.GetModifiedNode(STR_HEAVY_15_NOT_ALL) .ExplicitMods.HeavyModifications.Contains(mod => mod.Modification.AminoAcids.Contains(c => c == 'I'))); using (var testDir = new TestFilesDir(TestContext, ZIP_FILE)) { var modMatchDocContainer = InitMatchDocContainer(testDir); var libkeyModMatcher = new LibKeyModificationMatcher(); var anlLibSpec = new BiblioSpecLiteSpec("ANL_Combo", testDir.GetTestPath("ANL_Combined.blib")); var yeastLibSpec = new BiblioSpecLiteSpec("Yeast", testDir.GetTestPath("Yeast_atlas_small.blib")); modMatchDocContainer.ChangeLibSpecs(new[] { anlLibSpec, yeastLibSpec }); var docLibraries = modMatchDocContainer.Document.Settings.PeptideSettings.Libraries.Libraries; int anlLibIndex = docLibraries.IndexOf(library => Equals(library.Name, anlLibSpec.Name)); int yeastLibIndex = docLibraries.IndexOf(library => Equals(library.Name, yeastLibSpec.Name)); libkeyModMatcher.CreateMatches(modMatchDocContainer.Document.Settings, docLibraries[anlLibIndex].Keys, defSetSetLight, defSetHeavy); // Test can match 15N Assert.IsTrue(libkeyModMatcher.Matches.Values.Contains(match => match.HeavyMod != null && match.HeavyMod.Equivalent(LABEL15_N))); var uniModMetOx = UniMod.GetModification("Oxidation (M)", true); // Test can match Met Ox Assert.IsTrue(libkeyModMatcher.Matches.Values.Contains(match => match.StructuralMod != null && match.StructuralMod.Equivalent(uniModMetOx))); // Test can match 15N and Met ox! Assert.IsTrue(libkeyModMatcher.Matches.Contains(match => match.Key.Mass == 17 && match.Value.StructuralMod != null && match.Value.StructuralMod.Equivalent(uniModMetOx) && match.Value.HeavyMod != null && match.Value.HeavyMod.Equivalent(LABEL15_N))); // Test can match Cysteine (Implicit) and Met Ox (variable) libkeyModMatcher.CreateMatches(modMatchDocContainer.Document.Settings, docLibraries[yeastLibIndex].Keys, defSetSetLight, defSetHeavy); Assert.IsTrue(libkeyModMatcher.MatcherPepMods.StaticModifications.Contains(mod => mod.Formula.Equals(UniMod.GetModification(StaticModList.DEFAULT_NAME, true).Formula) && !mod.IsVariable)); Assert.IsTrue(libkeyModMatcher.MatcherPepMods.StaticModifications.Contains(mod => mod.Formula.Equals("O") && mod.IsVariable)); } }
/// <summary> /// Adds all peptides which can be matched to a background proteome to the /// proteins in the background proteins, and returns a new document with those /// proteins and peptides added. /// </summary> /// <param name="document">The starting document</param> /// <param name="dictCopy">A dictionary of peptides to peptide matches. All added /// peptides are removed</param> /// <param name="broker">For reporting long wait status</param> /// <param name="toPath">Path to the location in the document to add new items</param> /// <param name="selectedPath">Path to item in the document that should be selected /// after this operation is complete</param> /// <returns>A new document with matching peptides and their proteins addded</returns> private SrmDocument AddProteomePeptides(SrmDocument document, Dictionary<PeptideSequenceModKey, PeptideMatch> dictCopy, ILongWaitBroker broker, IdentityPath toPath, out IdentityPath selectedPath) { // Build a list of new PeptideGroupDocNodes to add to the document. var dictPeptideGroupsNew = new Dictionary<string, PeptideGroupDocNode>(); // Get starting progress values int startPercent = (broker != null ? broker.ProgressValue : 0); int processedPercent = 0; int processedCount = 0; int totalMatches = dictCopy.Count; // Just to make sure this is set selectedPath = toPath; foreach (PeptideMatch pepMatch in dictCopy.Values) { // Show progress, if in a long wait if (broker != null) { if (broker.IsCanceled) { selectedPath = toPath; return document; } // All peptides with protein get processed in this loop. Peptides // without proteins get added later. if (pepMatch.Proteins != null) processedCount++; int processPercentNow = processedCount * (100 - startPercent) / totalMatches; if (processedPercent != processPercentNow) { processedPercent = processPercentNow; broker.ProgressValue = startPercent + processedPercent; } } // Peptide should be added to the document, // unless the NoDuplicates radio was selected and the peptide has more than 1 protein associated with it. if (pepMatch.Proteins == null || (FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.NoDuplicates && pepMatch.Proteins.Count > 1)) continue; foreach (ProteinInfo protein in pepMatch.Proteins) { // Look for the protein in the document. string name = protein.ProteinMetadata.Name; var peptideGroupDocNode = FindPeptideGroupDocNode(document, name); bool foundInDoc = peptideGroupDocNode != null; bool foundInList = false; if (!foundInDoc) { // If the protein is not already in the document, // check to see if we have already created a PeptideGroupDocNode for it. if (dictPeptideGroupsNew.TryGetValue(name, out peptideGroupDocNode)) foundInList = true; // If not, create a new PeptideGroupDocNode. else { List<ProteinMetadata> alternativeProteins = new List<ProteinMetadata>(protein.Alternatives); peptideGroupDocNode = new PeptideGroupDocNode( new FastaSequence(name, protein.ProteinMetadata.Description, alternativeProteins, protein.Sequence), null, null, new PeptideDocNode[0]); } } // Create a new peptide that matches this protein. var fastaSequence = peptideGroupDocNode.PeptideGroup as FastaSequence; var peptideSequence = pepMatch.NodePep.Peptide.Sequence; // ReSharper disable PossibleNullReferenceException var begin = fastaSequence.Sequence.IndexOf(peptideSequence, StringComparison.Ordinal); // ReSharper restore PossibleNullReferenceException // Create a new PeptideDocNode using this peptide. var newPeptide = new Peptide(fastaSequence, peptideSequence, begin, begin + peptideSequence.Length, Settings.PeptideSettings.Enzyme.CountCleavagePoints(peptideSequence)); // Make sure we keep the same children. PeptideMatch match = pepMatch; var newNodePep = ((PeptideDocNode) new PeptideDocNode(newPeptide, pepMatch.NodePep.ExplicitMods, pepMatch.NodePep.ExplicitRetentionTime) .ChangeChildren(pepMatch.NodePep.Children.ToList().ConvertAll(nodeGroup => { // Create copies of the children in order to prevent transition groups with the same // global indices. var nodeTranGroup = (TransitionGroupDocNode) nodeGroup; if(match.Proteins != null && match.Proteins.Count() > 1) { nodeTranGroup = (TransitionGroupDocNode) nodeTranGroup.CopyId(); nodeTranGroup = (TransitionGroupDocNode) nodeTranGroup.ChangeChildren( nodeTranGroup.Children.ToList().ConvertAll(nodeTran => nodeTran.CopyId())); } return (DocNode) nodeTranGroup; })).ChangeAutoManageChildren(false)).ChangeSettings(document.Settings, SrmSettingsDiff.ALL); // If this PeptideDocNode is already a child of the PeptideGroupDocNode, // ignore it. if (peptideGroupDocNode.Children.Contains(nodePep => Equals(((PeptideDocNode) nodePep).Key, newNodePep.Key))) { Console.WriteLine(Resources.ViewLibraryPepMatching_AddProteomePeptides_Skipping__0__already_present, newNodePep.Peptide.Sequence); continue; } // Otherwise, add it to the list of children for the PeptideGroupNode. var newChildren = peptideGroupDocNode.Children.Cast<PeptideDocNode>().ToList(); newChildren.Add(newNodePep); newChildren.Sort(FastaSequence.ComparePeptides); // Store modified proteins by global index in a HashSet for second pass. var newPeptideGroupDocNode = peptideGroupDocNode.ChangeChildren(newChildren.Cast<DocNode>().ToArray()) .ChangeAutoManageChildren(false); // If the protein was already in the document, replace with the new PeptideGroupDocNode. if (foundInDoc) document = (SrmDocument)document.ReplaceChild(newPeptideGroupDocNode); // Otherwise, update the list of new PeptideGroupDocNodes to add. else { if (foundInList) dictPeptideGroupsNew.Remove(peptideGroupDocNode.Name); dictPeptideGroupsNew.Add(peptideGroupDocNode.Name, (PeptideGroupDocNode) newPeptideGroupDocNode); } // If we are only adding a single node, select it. if (PeptideMatches.Count == 1) selectedPath = new IdentityPath(new[] {peptideGroupDocNode.Id, newNodePep.Peptide}); // If the user only wants to add the first protein found, // we break the foreach loop after peptide has been added to its first protein.) if (FilterMultipleProteinMatches == BackgroundProteome.DuplicateProteinsFilter.FirstOccurence) break; } } if (dictPeptideGroupsNew.Count == 0) { return document; } // Sort the peptides. var nodePepGroupsSortedChildren = new List<PeptideGroupDocNode>(); foreach(PeptideGroupDocNode nodePepGroup in dictPeptideGroupsNew.Values) { var newChildren = nodePepGroup.Children.ToList(); // Have to cast all children to PeptideDocNodes in order to sort. var newChildrenNodePeps = newChildren.Cast<PeptideDocNode>().ToList(); newChildrenNodePeps.Sort(FastaSequence.ComparePeptides); nodePepGroupsSortedChildren.Add((PeptideGroupDocNode) nodePepGroup.ChangeChildren(newChildrenNodePeps.Cast<DocNode>().ToArray())); } // Sort the proteins. nodePepGroupsSortedChildren.Sort((node1, node2) => Comparer<string>.Default.Compare(node1.Name, node2.Name)); IdentityPath selPathTemp = selectedPath, nextAdd; document = document.AddPeptideGroups(nodePepGroupsSortedChildren, false, toPath, out selectedPath, out nextAdd); selectedPath = PeptideMatches.Count == 1 ? selPathTemp : selectedPath; return document; }
private static SrmDocument AddPeptidesToLibraryGroup(SrmDocument document, ICollection<PeptideMatch> listMatches, ILongWaitBroker broker, IdentityPath toPath, out IdentityPath selectedPath) { // Get starting progress values int startPercent = (broker != null ? broker.ProgressValue : 0); int processedPercent = 0; int processedCount = 0; int totalMatches = listMatches.Count; var listPeptides = new List<PeptideDocNode>(); foreach (var match in listMatches) { // Show progress, if in a long wait if (broker != null) { if (broker.IsCanceled) { selectedPath = null; return document; } processedCount++; int processPercentNow = processedCount * (100 - startPercent) / totalMatches; if (processedPercent != processPercentNow) { processedPercent = processPercentNow; broker.ProgressValue = startPercent + processedPercent; } } listPeptides.Add(match.NodePep.ChangeSettings(document.Settings, SrmSettingsDiff.ALL)); } bool hasVariable = listPeptides.Contains(nodePep => nodePep.HasExplicitMods && nodePep.ExplicitMods.IsVariableStaticMods); // Use existing group by this name, if present. var nodePepGroupNew = FindPeptideGroupDocNode(document, Resources.ViewLibraryPepMatching_AddPeptidesToLibraryGroup_Library_Peptides); if(nodePepGroupNew != null) { var newChildren = nodePepGroupNew.Children.ToList(); newChildren.AddRange(listPeptides.ConvertAll(nodePep => (DocNode) nodePep)); selectedPath = (listPeptides.Count == 1 ? new IdentityPath(nodePepGroupNew.Id, listPeptides[0].Id) : toPath); nodePepGroupNew = (PeptideGroupDocNode) nodePepGroupNew.ChangeChildren(newChildren); if (hasVariable) nodePepGroupNew = (PeptideGroupDocNode) nodePepGroupNew.ChangeAutoManageChildren(false); return (SrmDocument) document.ReplaceChild(nodePepGroupNew); } else { nodePepGroupNew = new PeptideGroupDocNode(new PeptideGroup(), Resources.ViewLibraryPepMatching_AddPeptidesToLibraryGroup_Library_Peptides, string.Empty, listPeptides.ToArray()); if (hasVariable) nodePepGroupNew = (PeptideGroupDocNode) nodePepGroupNew.ChangeAutoManageChildren(false); IdentityPath nextAdd; document = document.AddPeptideGroups(new[] { nodePepGroupNew }, true, toPath, out selectedPath, out nextAdd); selectedPath = new IdentityPath(selectedPath, nodePepGroupNew.Children[0].Id); return document; } }
public static SrmDocument ImportFasta(SrmDocument document, string fastaPath, IProgressMonitor monitor, IdentityPath to, out IdentityPath firstAdded, out IdentityPath nextAdd, out int emptyProteinCount) { var importer = new FastaImporter(document, false); using (TextReader reader = File.OpenText(fastaPath)) { document = document.AddPeptideGroups(importer.Import(reader, monitor, Helpers.CountLinesInFile(fastaPath)), false, null, out firstAdded, out nextAdd); } emptyProteinCount = importer.EmptyPeptideGroupCount; return document; }
private static SrmDocument AddPeptidesToLibraryGroup(SrmDocument document, ICollection <PeptideMatch> listMatches, ILongWaitBroker broker, IdentityPath toPath, out IdentityPath selectedPath) { // Get starting progress values int startPercent = (broker != null ? broker.ProgressValue : 0); int processedPercent = 0; int processedCount = 0; int totalMatches = listMatches.Count; selectedPath = null; // Some .blib files provide protein accessions (understood as Molecule List Names fro small molecules). // If those are provided we will use them as node names. // TODO(bspratt) for now we will do this only for small molecules since it might be a surprise to proteomics users. We should revisit that decision. foreach (var proteinName in listMatches.Select(m => m.LibInfo.Protein).Distinct()) { var listPeptides = new List <PeptideDocNode>(); var hasSmallMolecules = false; foreach (var match in listMatches.Where(m => Equals(proteinName, m.LibInfo.Protein))) { // Show progress, if in a long wait if (broker != null) { if (broker.IsCanceled) { selectedPath = null; return(document); } processedCount++; int processPercentNow = processedCount * (100 - startPercent) / totalMatches; if (processedPercent != processPercentNow) { processedPercent = processPercentNow; broker.ProgressValue = startPercent + processedPercent; } } listPeptides.Add(match.NodePep.ChangeSettings(document.Settings, SrmSettingsDiff.ALL)); hasSmallMolecules |= !match.NodePep.IsProteomic; } bool hasVariable = listPeptides.Contains(nodePep => nodePep.HasExplicitMods && nodePep.ExplicitMods.IsVariableStaticMods); // Use existing group by this name, if present. // If library provides a RefSpectraProteins table, use that to name the group // TODO(bspratt) for now we will use RefSpectraProteins names only for small molecules var genericLibraryPeptidesGroupName = hasSmallMolecules ? Resources.ViewLibraryPepMatching_AddPeptidesToLibraryGroup_Library_Molecules : Resources.ViewLibraryPepMatching_AddPeptidesToLibraryGroup_Library_Peptides; var nodeName = string.IsNullOrEmpty(proteinName) || listPeptides.Any(p => p.IsProteomic) // TODO(bspratt) revisit this caution-driven decision ? genericLibraryPeptidesGroupName : proteinName; var nodePepGroupNew = FindPeptideGroupDocNode(document, nodeName, null); if (nodePepGroupNew != null) { var newChildren = nodePepGroupNew.Children.ToList(); newChildren.AddRange(listPeptides.ConvertAll(nodePep => (DocNode)nodePep)); selectedPath = (listPeptides.Count == 1 ? new IdentityPath(nodePepGroupNew.Id, listPeptides[0].Id) : toPath); nodePepGroupNew = (PeptideGroupDocNode)nodePepGroupNew.ChangeChildren(newChildren); if (hasVariable) { nodePepGroupNew = (PeptideGroupDocNode)nodePepGroupNew.ChangeAutoManageChildren(false); } document = (SrmDocument)document.ReplaceChild(nodePepGroupNew); } else { nodePepGroupNew = new PeptideGroupDocNode(new PeptideGroup(), nodeName, string.Empty, listPeptides.ToArray()); if (hasVariable) { nodePepGroupNew = (PeptideGroupDocNode)nodePepGroupNew.ChangeAutoManageChildren(false); } IdentityPath nextAdd; document = document.AddPeptideGroups(new[] { nodePepGroupNew }, true, toPath, out selectedPath, out nextAdd); selectedPath = new IdentityPath(selectedPath, nodePepGroupNew.Children[0].Id); } } return(document); }