/// <summary> /// Returns the string that MSstats code uses to identify a row of data in the MSstats Input report. /// </summary> private static string GetFeatureKey(SrmDocument document, IdentityPath identityPath) { PeptideGroupDocNode peptideGroup = (PeptideGroupDocNode)document.FindNode(identityPath.GetIdentity(0)); PeptideDocNode peptide = (PeptideDocNode)peptideGroup.FindNode(identityPath.GetIdentity(1)); TransitionGroupDocNode transitionGroup = (TransitionGroupDocNode)peptide.FindNode(identityPath.GetIdentity(2)); TransitionDocNode transition = (TransitionDocNode)transitionGroup.FindNode(identityPath.GetIdentity(3)); return(peptide.ModifiedSequenceDisplay + '_' + transitionGroup.PrecursorCharge + '_' + GetFragmentIon(transition) + '_' + transition.Transition.Charge); }
public void IdentityPathTest() { SimpleIdentity id = new SimpleIdentity(); IdentityPath pathSimple = new IdentityPath(id); Assert.AreEqual(IdentityPath.ROOT, pathSimple.Parent); Assert.AreEqual(IdentityPath.ROOT, pathSimple.GetPathTo(-1)); Assert.AreEqual(id, pathSimple.Child); Assert.AreEqual(1, pathSimple.Length); Assert.AreEqual(0, pathSimple.Depth); AssertEx.ThrowsException <IndexOutOfRangeException>(() => pathSimple.GetPathTo(-2)); AssertEx.ThrowsException <IndexOutOfRangeException>(() => pathSimple.GetPathTo(1)); IdentityPath pathRoot = new IdentityPath(new Identity[0]); Assert.AreEqual(IdentityPath.ROOT, pathRoot); Assert.AreEqual(0, pathRoot.Length); Assert.AreEqual(-1, pathRoot.Depth); AssertEx.ThrowsException <IndexOutOfRangeException>(() => pathRoot.Child); AssertEx.ThrowsException <IndexOutOfRangeException>(() => pathRoot.Parent); const int count = 5; List <Identity> listId1 = new List <Identity>(); List <Identity> listId2 = new List <Identity>(); HashSet <IdentityPath> setPaths = new HashSet <IdentityPath>(); IdentityPath last = IdentityPath.ROOT; for (int i = 0; i < count; i++) { listId1.Add(new NumericIdentity(i)); listId2.Add(new NumericIdentity(i)); IdentityPath path = new IdentityPath(listId1); Assert.AreEqual(path, new IdentityPath(listId1)); Assert.AreNotEqual(path, new IdentityPath(listId2)); Assert.AreEqual(last, path.Parent); Assert.AreSame(listId1[i], path.Child); Assert.AreEqual(path, new IdentityPath(path.Parent, path.Child)); Assert.AreEqual(i, path.Depth); Assert.AreEqual(listId1.Count, path.Length); Assert.AreSame(path.Child, path.GetIdentity(path.Depth)); Assert.AreEqual(path.Parent, path.GetPathTo(path.Depth - 1)); Assert.AreEqual("/" + listId1.ToString("/"), path.ToString()); for (int j = 0; j < i; j++) { Assert.IsTrue(setPaths.Contains(path.GetPathTo(j))); } setPaths.Add(path); last = path; } }
protected static bool StartsWith(IdentityPath child, IdentityPath ancestor) { if (child.Length < ancestor.Length) { return(false); } for (int i = 0; i < ancestor.Length; i++) { if (!ReferenceEquals(ancestor.GetIdentity(i), child.GetIdentity(i))) { return(false); } } return(true); }
private SrmDocument AddPeptides(SrmDocument document, bool validating, ref IdentityPath selectedPath) { if (tabControl1.SelectedTab != tabPagePeptideList) return document; var matcher = new ModificationMatcher(); var listPeptideSequences = ListPeptideSequences(); if (listPeptideSequences == null) return null; try { matcher.CreateMatches(document.Settings, listPeptideSequences, Settings.Default.StaticModList, Settings.Default.HeavyModList); } catch (FormatException e) { MessageDlg.ShowException(this, e); ShowPeptideError(new PasteError { Column = colPeptideSequence.Index, Message = Resources.PasteDlg_AddPeptides_Unable_to_interpret_peptide_modifications }); return null; } var strNameMatches = matcher.FoundMatches; if (!validating && !string.IsNullOrEmpty(strNameMatches)) { string message = TextUtil.LineSeparate(Resources.PasteDlg_AddPeptides_Would_you_like_to_use_the_Unimod_definitions_for_the_following_modifications, string.Empty, strNameMatches); if (MultiButtonMsgDlg.Show(this, message, Resources.PasteDlg_AddPeptides_OK) == DialogResult.Cancel) return null; } var backgroundProteome = GetBackgroundProteome(document); // Insert last to first so that proteins get inserted on top of each other // in the order they are added. Peptide insertion into peptide lists needs // to be carefully tracked to insert them in the order they are listed in // the grid. int lastGroupGlobalIndex = 0, lastPeptideIndex = -1; for (int i = gridViewPeptides.Rows.Count - 1; i >= 0; i--) { PeptideGroupDocNode peptideGroupDocNode; var row = gridViewPeptides.Rows[i]; var pepModSequence = Convert.ToString(row.Cells[colPeptideSequence.Index].Value); pepModSequence = FastaSequence.NormalizeNTerminalMod(pepModSequence); var proteinName = Convert.ToString(row.Cells[colPeptideProtein.Index].Value); if (string.IsNullOrEmpty(pepModSequence) && string.IsNullOrEmpty(proteinName)) continue; if (string.IsNullOrEmpty(proteinName)) { peptideGroupDocNode = GetSelectedPeptideGroupDocNode(document, selectedPath); if (!IsPeptideListDocNode(peptideGroupDocNode)) { peptideGroupDocNode = null; } } else { peptideGroupDocNode = FindPeptideGroupDocNode(document, proteinName); } if (peptideGroupDocNode == null) { if (string.IsNullOrEmpty(proteinName)) { peptideGroupDocNode = new PeptideGroupDocNode(new PeptideGroup(), document.GetPeptideGroupId(true), null, new PeptideDocNode[0]); } else { ProteinMetadata metadata = null; PeptideGroup peptideGroup = backgroundProteome.IsNone ? new PeptideGroup() : (backgroundProteome.GetFastaSequence(proteinName, out metadata) ?? new PeptideGroup()); if (metadata != null) peptideGroupDocNode = new PeptideGroupDocNode(peptideGroup, metadata, new PeptideDocNode[0]); else peptideGroupDocNode = new PeptideGroupDocNode(peptideGroup, proteinName, peptideGroup.Description, new PeptideDocNode[0]); } // Add to the end, if no insert node var to = selectedPath; if (to == null || to.Depth < (int)SrmDocument.Level.MoleculeGroups) document = (SrmDocument)document.Add(peptideGroupDocNode); else { Identity toId = selectedPath.GetIdentity((int) SrmDocument.Level.MoleculeGroups); document = (SrmDocument) document.Insert(toId, peptideGroupDocNode); } selectedPath = new IdentityPath(peptideGroupDocNode.Id); } var peptides = new List<PeptideDocNode>(); foreach (PeptideDocNode peptideDocNode in peptideGroupDocNode.Children) { peptides.Add(peptideDocNode); } var fastaSequence = peptideGroupDocNode.PeptideGroup as FastaSequence; PeptideDocNode nodePepNew; if (fastaSequence != null) { // Attempt to create node for error checking. nodePepNew = fastaSequence.CreateFullPeptideDocNode(document.Settings, FastaSequence.StripModifications(pepModSequence)); if (nodePepNew == null) { ShowPeptideError(new PasteError { Column = colPeptideSequence.Index, Line = i, Message = Resources.PasteDlg_AddPeptides_This_peptide_sequence_was_not_found_in_the_protein_sequence }); return null; } } // Create node using ModificationMatcher. nodePepNew = matcher.GetModifiedNode(pepModSequence, fastaSequence).ChangeSettings(document.Settings, SrmSettingsDiff.ALL); // Avoid adding an existing peptide a second time. if (!peptides.Contains(nodePep => Equals(nodePep.Key, nodePepNew.Key))) { if (nodePepNew.Peptide.FastaSequence != null) { peptides.Add(nodePepNew); peptides.Sort(FastaSequence.ComparePeptides); } else { int groupGlobalIndex = peptideGroupDocNode.PeptideGroup.GlobalIndex; if (groupGlobalIndex == lastGroupGlobalIndex && lastPeptideIndex != -1) { peptides.Insert(lastPeptideIndex, nodePepNew); } else { lastPeptideIndex = peptides.Count; peptides.Add(nodePepNew); } lastGroupGlobalIndex = groupGlobalIndex; } var newPeptideGroupDocNode = new PeptideGroupDocNode(peptideGroupDocNode.PeptideGroup, peptideGroupDocNode.Annotations, peptideGroupDocNode.Name, peptideGroupDocNode.Description, peptides.ToArray(), false); document = (SrmDocument)document.ReplaceChild(newPeptideGroupDocNode); } } if (!validating && listPeptideSequences.Count > 0) { var pepModsNew = matcher.GetDocModifications(document); document = document.ChangeSettings(document.Settings.ChangePeptideModifications(mods => pepModsNew)); document.Settings.UpdateDefaultModifications(false); } return document; }
private SrmDocument AddTransitionList(SrmDocument document, ref IdentityPath selectedPath) { if (tabControl1.SelectedTab != tabPageTransitionList) return document; if (IsMolecule) { // Save the current column order to settings var active = new List<string>(); for (int order = 0; order < gridViewTransitionList.Columns.Count; order++) { for (int gridcol = 0; gridcol < gridViewTransitionList.Columns.Count; gridcol++) { var dataGridViewColumn = gridViewTransitionList.Columns[gridcol]; if (dataGridViewColumn.DisplayIndex == order) { if (dataGridViewColumn.Visible) active.Add(dataGridViewColumn.Name); break; } } } Settings.Default.CustomMoleculeTransitionInsertColumnsList = active; // We will accept a completely empty product list as meaning // "these are all precursor transitions" var requireProductInfo = false; for (var i = 0; i < gridViewTransitionList.RowCount - 1; i++) { var row = gridViewTransitionList.Rows[i]; var productMz = row.Cells[INDEX_PRODUCT_MZ].Value; var productFormula = row.Cells[INDEX_PRODUCT_FORMULA].Value; var productCharge = row.Cells[INDEX_PRODUCT_CHARGE].Value; if ((productMz != null && productMz.ToString().Length > 0) || (productFormula != null && productFormula.ToString().Length > 0) || (productCharge != null && productCharge.ToString().Length > 0)) { requireProductInfo = true; // Product list is not completely empty break; } } // For each row in the grid, add to or begin MoleculeGroup|Molecule|TransitionList tree for(int i = 0; i < gridViewTransitionList.RowCount - 1; i ++) { DataGridViewRow row = gridViewTransitionList.Rows[i]; var precursor = ReadPrecursorOrProductColumns(document, row, true); // Get molecule values if (precursor == null) return null; if (requireProductInfo && ReadPrecursorOrProductColumns(document, row, false) == null) { return null; } var charge = precursor.Charge; var precursorMonoMz = BioMassCalc.CalculateIonMz(precursor.MonoMass, charge); var precursorAverageMz = BioMassCalc.CalculateIonMz(precursor.AverageMass, charge); // Preexisting molecule group? bool pepGroupFound = false; foreach (var pepGroup in document.MoleculeGroups) { var pathPepGroup = new IdentityPath(pepGroup.Id); if (Equals(pepGroup.Name, Convert.ToString(row.Cells[INDEX_MOLECULE_GROUP].Value))) { // Found a molecule group with the same name - can we find an existing transition group to which we can add a transition? pepGroupFound = true; bool pepFound = false; foreach (var pep in pepGroup.SmallMolecules) { var pepPath = new IdentityPath(pathPepGroup, pep.Id); var ionMonoMz = BioMassCalc.CalculateIonMz(pep.CustomIon.MonoisotopicMass, charge); var ionAverageMz = BioMassCalc.CalculateIonMz(pep.CustomIon.AverageMass, charge); // Match existing molecule if same name (if any) and same formula (if any) and similar m/z at the precursor charge // (we don't just check mass since we don't have a tolerance value for that) // Or same name If any) and identical formula when stripped of labels // Or same name, no formula, and different isotope labels if (Equals(pep.CustomIon.Name, precursor.Name) && ((Equals(pep.CustomIon.Formula, precursor.Formula) && Math.Abs(ionMonoMz - precursorMonoMz) <= document.Settings.TransitionSettings.Instrument.MzMatchTolerance && Math.Abs(ionAverageMz - precursorAverageMz) <= document.Settings.TransitionSettings.Instrument.MzMatchTolerance) || (!Equals(pep.CustomIon.Formula, precursor.Formula) && Equals(pep.CustomIon.UnlabeledFormula, BioMassCalc.MONOISOTOPIC.StripLabelsFromFormula(precursor.Formula))) || (string.IsNullOrEmpty(pep.CustomIon.Formula) && string.IsNullOrEmpty(precursor.Formula) && !pep.TransitionGroups.Any(t => Equals(t.TransitionGroup.LabelType, precursor.IsotopeLabelType??IsotopeLabelType.light))) )) { pepFound = true; bool tranGroupFound = false; foreach (var tranGroup in pep.TransitionGroups) { var pathGroup = new IdentityPath(pepPath, tranGroup.Id); if (Math.Abs(tranGroup.PrecursorMz - precursor.Mz) <= document.Settings.TransitionSettings.Instrument.MzMatchTolerance) { tranGroupFound = true; var tranFound = false; try { var tranNode = GetMoleculeTransition(document, row, pep.Peptide, tranGroup.TransitionGroup, requireProductInfo); if (tranNode == null) return null; foreach (var tran in tranGroup.Transitions) { if (Equals(tranNode.Transition.CustomIon,tran.Transition.CustomIon)) { tranFound = true; break; } } if (!tranFound) { document = (SrmDocument) document.Add(pathGroup, tranNode); } } catch (InvalidDataException e) { // Some error we didn't catch in the basic checks ShowTransitionError(new PasteError { Column = 0, Line = row.Index, Message = e.Message }); return null; } break; } } if (!tranGroupFound) { var node = GetMoleculeTransitionGroup(document, row, pep.Peptide, requireProductInfo); if (node == null) return null; document = (SrmDocument) document.Add(pepPath, node); } break; } } if (!pepFound) { var node = GetMoleculePeptide(document, row, pepGroup.PeptideGroup, requireProductInfo); if (node == null) return null; document = (SrmDocument) document.Add(pathPepGroup,node); } break; } } if (!pepGroupFound) { var node = GetMoleculePeptideGroup(document, row, requireProductInfo); if (node == null) return null; IdentityPath first; IdentityPath next; document = document.AddPeptideGroups(new[] {node}, false,null , out first,out next); } } } else { var backgroundProteome = GetBackgroundProteome(document); var sbTransitionList = new StringBuilder(); var dictNameSeq = new Dictionary<string, FastaSequence>(); // Add all existing FASTA sequences in the document to the name to seq dictionary // Including named peptide lists would cause the import code to give matching names // in this list new names (e.g. with 1, 2, 3 appended). In this code, the names // are intended to be merged. foreach (var nodePepGroup in document.Children.Cast<PeptideGroupDocNode>().Where(n => !n.IsPeptideList)) { if (!dictNameSeq.ContainsKey(nodePepGroup.Name)) dictNameSeq.Add(nodePepGroup.Name, (FastaSequence) nodePepGroup.PeptideGroup); } // Check for simple errors and build strings for import for (int i = 0; i < gridViewTransitionList.Rows.Count; i++) { var row = gridViewTransitionList.Rows[i]; var peptideSequence = Convert.ToString(row.Cells[colTransitionPeptide.Index].Value); var proteinName = Convert.ToString(row.Cells[colTransitionProteinName.Index].Value); var precursorMzText = Convert.ToString(row.Cells[colTransitionPrecursorMz.Index].Value); var productMzText = Convert.ToString(row.Cells[colTransitionProductMz.Index].Value); if (string.IsNullOrEmpty(peptideSequence) && string.IsNullOrEmpty(proteinName)) { continue; } if (string.IsNullOrEmpty(peptideSequence)) { ShowTransitionError(new PasteError { Column = colTransitionPeptide.Index, Line = i, Message = Resources.PasteDlg_ListPeptideSequences_The_peptide_sequence_cannot_be_blank }); return null; } if (!FastaSequence.IsExSequence(peptideSequence)) { ShowTransitionError(new PasteError { Column = colTransitionPeptide.Index, Line = i, Message = Resources.PasteDlg_ListPeptideSequences_This_peptide_sequence_contains_invalid_characters }); return null; } double mz; if (!double.TryParse(precursorMzText, out mz)) { ShowTransitionError(new PasteError { Column = colTransitionPrecursorMz.Index, Line = i, Message = Resources.PasteDlg_AddTransitionList_The_precursor_m_z_must_be_a_number_ }); return null; } if (!double.TryParse(productMzText, out mz)) { ShowTransitionError(new PasteError { Column = colTransitionProductMz.Index, Line = i, Message = Resources.PasteDlg_AddTransitionList_The_product_m_z_must_be_a_number_ }); return null; } const char sep = TRANSITION_LIST_SEPARATOR; // Add columns in order specified by TRANSITION_LIST_COL_INDICES sbTransitionList .Append(proteinName).Append(sep) .Append(peptideSequence).Append(sep) .Append(precursorMzText).Append(sep) .Append(productMzText).AppendLine(); // Build FASTA sequence text in cases where it is known if (!dictNameSeq.ContainsKey(proteinName)) { var fastaSeq = backgroundProteome.GetFastaSequence(proteinName); if (fastaSeq != null) dictNameSeq.Add(proteinName, fastaSeq); } } if (sbTransitionList.Length == 0) return document; // Do the actual import into PeptideGroupDocNodes IEnumerable<PeptideGroupDocNode> peptideGroupDocNodes; try { List<TransitionImportErrorInfo> errorList; List<MeasuredRetentionTime> irtPeptides; List<SpectrumMzInfo> librarySpectra; var inputs = new MassListInputs(sbTransitionList.ToString(), LocalizationHelper.CurrentCulture, TRANSITION_LIST_SEPARATOR); var importer = new MassListImporter(document, inputs); // TODO: support long-wait broker peptideGroupDocNodes = importer.Import(null, TRANSITION_LIST_COL_INDICES, dictNameSeq, out irtPeptides, out librarySpectra, out errorList); if (errorList.Any()) { var firstError = errorList[0]; if (firstError.Row.HasValue) { throw new LineColNumberedIoException(firstError.ErrorMessage, firstError.Row.Value, firstError.Column ?? -1); } else { throw new InvalidDataException(firstError.ErrorMessage); } } } catch (LineColNumberedIoException x) { var columns = new[] { colTransitionProteinName, colPeptideSequence, colTransitionPrecursorMz, colTransitionProductMz }; ShowTransitionError(new PasteError { Column = x.ColumnIndex >= 0 ? columns[x.ColumnIndex].Index : 0, Line = (int) x.LineNumber - 1, Message = x.PlainMessage }); return null; } catch (InvalidDataException x) { ShowTransitionError(new PasteError { Message = x.Message }); return null; } // Insert the resulting nodes into the document tree, merging when possible bool after = false; foreach (var nodePepGroup in peptideGroupDocNodes) { PeptideGroupDocNode nodePepGroupExist = FindPeptideGroupDocNode(document, nodePepGroup); if (nodePepGroupExist != null) { var nodePepGroupNew = nodePepGroupExist.Merge(nodePepGroup); if (!ReferenceEquals(nodePepGroupExist, nodePepGroupNew)) document = (SrmDocument) document.ReplaceChild(nodePepGroupNew); } else { // Add to the end, if no insert node var to = selectedPath; if (to == null || to.Depth < (int) SrmDocument.Level.MoleculeGroups) document = (SrmDocument) document.Add(nodePepGroup); else { Identity toId = selectedPath.GetIdentity((int) SrmDocument.Level.MoleculeGroups); document = (SrmDocument) document.Insert(toId, nodePepGroup, after); } selectedPath = new IdentityPath(nodePepGroup.Id); // All future insertions should be after, to avoid reversing the list after = true; } } } return document; }
private SrmDocument AddProteins(SrmDocument document, ref IdentityPath selectedPath) { if (tabControl1.SelectedTab != tabPageProteinList) return document; var backgroundProteome = GetBackgroundProteome(document); for (int i = gridViewProteins.Rows.Count - 1; i >= 0; i--) { var row = gridViewProteins.Rows[i]; var proteinName = Convert.ToString(row.Cells[colProteinName.Index].Value); if (String.IsNullOrEmpty(proteinName)) { continue; } var pastedMetadata = new ProteinMetadata(proteinName, Convert.ToString(row.Cells[colProteinDescription.Index].Value), NullForEmpty(Convert.ToString(row.Cells[colProteinPreferredName.Index].Value)), NullForEmpty(Convert.ToString(row.Cells[colProteinAccession.Index].Value)), NullForEmpty(Convert.ToString(row.Cells[colProteinGene.Index].Value)), NullForEmpty(Convert.ToString(row.Cells[colProteinSpecies.Index].Value))); FastaSequence fastaSequence = null; if (!backgroundProteome.IsNone) { ProteinMetadata protdbMetadata; fastaSequence = backgroundProteome.GetFastaSequence(proteinName, out protdbMetadata); // Fill in any gaps in pasted metadata with that in protdb pastedMetadata = pastedMetadata.Merge(protdbMetadata); } // Strip any whitespace (tab, newline etc) In case it was copied out of a FASTA file var fastaSequenceString = new string(Convert.ToString(row.Cells[colProteinSequence.Index].Value).Where(c => !Char.IsWhiteSpace(c)).ToArray()); if (!string.IsNullOrEmpty(fastaSequenceString)) { try { if (fastaSequence == null) // Didn't match anything in protdb { fastaSequence = new FastaSequence(pastedMetadata.Name, pastedMetadata.Description, new ProteinMetadata[0], fastaSequenceString); } else { if (fastaSequence.Sequence != fastaSequenceString) { fastaSequence = new FastaSequence(pastedMetadata.Name, pastedMetadata.Description, fastaSequence.Alternatives, fastaSequenceString); } } } catch (Exception exception) { ShowProteinError(new PasteError { Line = i, Column = colProteinDescription.Index, Message = string.Format(Resources.PasteDlg_AddProteins_Invalid_protein_sequence__0__, exception.Message) }); return null; } } if (fastaSequence == null) { ShowProteinError( new PasteError { Line = i, Message = backgroundProteome.IsNone ? Resources.PasteDlg_AddProteins_Missing_protein_sequence : Resources.PasteDlg_AddProteins_This_protein_was_not_found_in_the_background_proteome_database }); return null; } var description = pastedMetadata.Description; if (!string.IsNullOrEmpty(description) && description != fastaSequence.Description) { fastaSequence = new FastaSequence(fastaSequence.Name, description, fastaSequence.Alternatives, fastaSequence.Sequence); } pastedMetadata = pastedMetadata.ChangeName(fastaSequence.Name).ChangeDescription(fastaSequence.Description); // Make sure these agree var nodeGroupPep = new PeptideGroupDocNode(fastaSequence, pastedMetadata, new PeptideDocNode[0]); nodeGroupPep = nodeGroupPep.ChangeSettings(document.Settings, SrmSettingsDiff.ALL); var to = selectedPath; if (to == null || to.Depth < (int)SrmDocument.Level.MoleculeGroups) document = (SrmDocument)document.Add(nodeGroupPep); else { Identity toId = selectedPath.GetIdentity((int)SrmDocument.Level.MoleculeGroups); document = (SrmDocument)document.Insert(toId, nodeGroupPep); } selectedPath = new IdentityPath(nodeGroupPep.Id); } return document; }
public void ImportFastaTest() { SrmDocument document = new SrmDocument(SrmSettingsList.GetDefault0_6()); IdentityPath path = IdentityPath.ROOT; SrmDocument docFasta = document.ImportFasta(new StringReader(ExampleText.TEXT_FASTA_YEAST), false, path, out path); AssertEx.IsDocumentState(docFasta, 1, 2, 98, 311); Assert.AreEqual("YAL001C", ((PeptideGroupDocNode)docFasta.Children[0]).Name); Assert.AreEqual("YAL002W", ((PeptideGroupDocNode)docFasta.Children[1]).Name); Assert.AreEqual(1, path.Length); Assert.IsInstanceOfType(path.GetIdentity(0), typeof(FastaSequence)); Assert.AreEqual("YAL001C", ((FastaSequence)path.GetIdentity(0)).Name); int maxMz = docFasta.Settings.TransitionSettings.Instrument.MaxMz - 120; foreach (PeptideGroupDocNode nodeGroup in docFasta.Children) { Assert.IsInstanceOfType(nodeGroup.Id, typeof(FastaSequence)); int lastEnd = docFasta.Settings.PeptideSettings.Filter.ExcludeNTermAAs - 1; foreach (PeptideDocNode nodePeptide in nodeGroup.Children) { Peptide peptide = nodePeptide.Peptide; char prev = peptide.PrevAA; if (prev != 'K' && prev != 'R') { Assert.Fail("Unexpected preceding cleavage at {0}", prev); } string seq = peptide.Sequence; char last = seq[seq.Length - 1]; if (last != 'K' && last != 'R' && peptide.NextAA != '-') { Assert.Fail("Unexpected cleavage at {0}", last); } Assert.IsNotNull(peptide.Begin); Assert.IsNotNull(peptide.End); // Make sure peptides are ordered, and not overlapping if (peptide.Begin.Value < lastEnd) { Assert.Fail("Begin {0} less than last end {1}.", peptide.Begin.Value, lastEnd); } lastEnd = peptide.End.Value; IList <DocNode> nodesTrans = ((DocNodeParent)nodePeptide.Children[0]).Children; int trans = nodesTrans.Count; if (trans < 3) { // Might have been cut off by the instrument limit. if ((trans == 0 && ((TransitionGroupDocNode)nodePeptide.Children[0]).PrecursorMz < maxMz) || (trans > 0 && ((TransitionDocNode)nodesTrans[0]).Mz < maxMz)) { Assert.Fail("Found {0} transitions, expecting 3.", trans); } } // Might have extra proline transitions else if (trans > 3 && peptide.Sequence.IndexOf('P') == -1) { Assert.Fail("Found {0} transitions, expecting 3.", trans); } // Make sure transitions are ordered correctly IonType lastType = IonType.a; int lastOffset = -1; foreach (TransitionDocNode nodeTran in nodesTrans) { Transition transition = nodeTran.Transition; if (lastType == transition.IonType) { Assert.IsTrue(transition.CleavageOffset > lastOffset); } else { Assert.IsTrue(((int)transition.IonType) > ((int)lastType)); } lastType = transition.IonType; lastOffset = transition.CleavageOffset; } } } // Make sure old document is unmodified. Assert.AreEqual(0, document.RevisionIndex); Assert.AreEqual(0, document.PeptideTransitionCount); // Re-paste of fasta should have no impact. // path = IdentityPath.ROOT; use null as substitute for Root SrmDocument docFasta2 = docFasta.ImportFasta(new StringReader(ExampleText.TEXT_FASTA_YEAST), false, null, out path); // Returns the original document to avoid adding undo record in running app Assert.AreSame(docFasta, docFasta2); Assert.IsNull(path); // Discard double-insert document, and add peptides list into previous document path = IdentityPath.ROOT; SrmDocument docPeptides = docFasta.ImportFasta(new StringReader(TEXT_BOVINE_PEPTIDES1), true, path, out path); AssertEx.IsDocumentState(docPeptides, 2, 3, 111, 352); Assert.AreEqual(1, path.Length); Assert.IsNotInstanceOfType(path.GetIdentity(0), typeof(FastaSequence)); Assert.AreEqual("Peptides1", ((PeptideGroupDocNode)docPeptides.FindNode(path)).Name); PeptideGroupDocNode nodePepList = (PeptideGroupDocNode)docPeptides.Children[2]; Assert.IsNotInstanceOfType(nodePepList.Id, typeof(FastaSequence)); // Make sure other two nodes are unchanged Assert.AreSame(docFasta.Children[0], docPeptides.Children[0]); Assert.AreSame(docFasta.Children[1], docPeptides.Children[1]); foreach (PeptideDocNode nodePeptide in nodePepList.Children) { char prev = nodePeptide.Peptide.PrevAA; char next = nodePeptide.Peptide.NextAA; if (prev != 'X' || next != 'X') { Assert.Fail("Expected amino acids X, but found {0} or {1}", prev, next); } string seq = nodePeptide.Peptide.Sequence; char last = seq[seq.Length - 1]; // Just because they are tryptic peptides in the list if (last != 'K' && last != 'R' && nodePeptide.Peptide.NextAA != '-') { Assert.Fail("Unexpected cleavage at {0}", last); } Assert.IsNull(nodePeptide.Peptide.Begin); Assert.IsNull(nodePeptide.Peptide.End); IList <DocNode> nodesTrans = ((DocNodeParent)nodePeptide.Children[0]).Children; int trans = nodesTrans.Count; if (trans < 3) { // Might have been cut off by the instrument limit. if ((trans == 0 && ((TransitionGroupDocNode)nodePeptide.Children[0]).PrecursorMz < maxMz) || (trans > 0 && ((TransitionDocNode)nodesTrans[0]).Mz < maxMz)) { Assert.Fail("Found {0} transitions, expecting 3.", trans); } } // Might have extra proline transitions else if (trans > 3 && nodePeptide.Peptide.Sequence.IndexOf('P') == -1) { Assert.Fail("Found {0} transitions, expecting 3.", trans); } } // Make sure old documents are unmodified. AssertEx.IsDocumentState(document, 0, 0, 0, 0); AssertEx.IsDocumentState(docFasta, 1, 2, 98, 311); AssertEx.IsDocumentState(docPeptides, 2, 3, 111, 352); // Add peptides in all possible locations. // 1. Root (already done) // 1. Before another group path = docPeptides.GetPathTo(0); SrmDocument docPeptides2 = docPeptides.ImportFasta(new StringReader(TEXT_BOVINE_PEPTIDES1), true, path, out path); AssertEx.IsDocumentState(docPeptides2, 3, 4, 124, 393); Assert.IsNotInstanceOfType(docPeptides2.Children[0].Id, typeof(FastaSequence)); Assert.AreEqual(docPeptides2.Children[0].Id, path.GetIdentity(0)); Assert.IsInstanceOfType(docPeptides2.Children[1].Id, typeof(FastaSequence)); // Make sure previously existing groups are unchanged Assert.AreSame(docPeptides.Children[0], docPeptides2.Children[1]); Assert.AreSame(docPeptides.Children[1], docPeptides2.Children[2]); Assert.AreSame(docPeptides.Children[2], docPeptides2.Children[3]); // 2. Inside a FASTA group path = docPeptides2.GetPathTo((int)SrmDocument.Level.Transitions, 100); SrmDocument docPeptides3 = docPeptides2.ImportFasta(new StringReader(TEXT_BOVINE_PEPTIDES1), true, path, out path); AssertEx.IsDocumentState(docPeptides3, 4, 5, 137, 434); Assert.AreEqual(2, docPeptides3.FindNodeIndex(path)); // Make sure previously existing groups are unchanged Assert.AreSame(docPeptides2.Children[1], docPeptides3.Children[1]); Assert.AreSame(docPeptides2.Children[2], docPeptides3.Children[3]); // 3. To a peptide list // a. Same peptides path = docPeptides2.GetPathTo(0); docPeptides3 = docPeptides2.ImportFasta(new StringReader(TEXT_BOVINE_PEPTIDES1), true, path, out path); // No longer filter repeated peptides, because they are useful for explicit modifictations. Assert.AreNotSame(docPeptides2, docPeptides3); Assert.IsNotNull(path); // b. Different paptides path = docPeptides2.GetPathTo(0); IdentityPath pathFirstPep = docPeptides3.GetPathTo((int)SrmDocument.Level.Molecules, 0); docPeptides3 = docPeptides2.ImportFasta(new StringReader(TEXT_BOVINE_PEPTIDES2), true, path, out path); AssertEx.IsDocumentState(docPeptides3, 4, 4, 140, 448); Assert.AreSame(docPeptides2.Children[0].Id, docPeptides3.Children[0].Id); Assert.AreNotSame(docPeptides2.Children[0], docPeptides3.Children[0]); Assert.AreEqual("LVTDLTK", ((PeptideDocNode)docPeptides3.FindNode(path)).Peptide.Sequence); int index = docPeptides3.FindNodeIndex(path); IdentityPath pathPreceding = docPeptides3.GetPathTo(path.Depth, index - 1); Assert.AreEqual("IVGYLDEEGVLDQNR", ((PeptideDocNode)docPeptides3.FindNode(pathPreceding)).Peptide.Sequence); Assert.AreEqual(0, docPeptides3.FindNodeIndex(pathFirstPep)); // 4. At a peptide in a peptide list path = docPeptides2.GetPathTo((int)SrmDocument.Level.Molecules, 0); docPeptides3 = docPeptides2.ImportFasta(new StringReader(TEXT_BOVINE_PEPTIDES2), true, path, out path); AssertEx.IsDocumentState(docPeptides3, 4, 4, 140, 448); Assert.AreSame(docPeptides2.Children[0].Id, docPeptides3.Children[0].Id); Assert.AreNotSame(docPeptides2.Children[0], docPeptides3.Children[0]); Assert.AreEqual(0, docPeptides3.FindNodeIndex(path)); Assert.AreEqual(16, docPeptides3.FindNodeIndex(pathFirstPep)); // 5. Inside a peptide in a peptide list path = docPeptides2.GetPathTo((int)SrmDocument.Level.Transitions, 0); docPeptides3 = docPeptides2.ImportFasta(new StringReader(TEXT_BOVINE_PEPTIDES2), true, path, out path); AssertEx.IsDocumentState(docPeptides3, 4, 4, 140, 448); Assert.AreSame(docPeptides2.Children[0].Id, docPeptides3.Children[0].Id); Assert.AreNotSame(docPeptides2.Children[0], docPeptides3.Children[0]); Assert.AreEqual(1, docPeptides3.FindNodeIndex(path)); Assert.AreEqual(0, docPeptides3.FindNodeIndex(pathFirstPep)); }
/// <summary> /// Adds a list of PeptideDocNodes found in the library to the current document. /// </summary> public SrmDocument AddPeptides(SrmDocument document, ILongWaitBroker broker, IdentityPath toPath, out IdentityPath selectedPath) { if (toPath != null && toPath.Depth == (int)SrmDocument.Level.MoleculeGroups && ReferenceEquals(toPath.GetIdentity((int)SrmDocument.Level.MoleculeGroups), SequenceTree.NODE_INSERT_ID)) { toPath = null; } SkippedPeptideCount = 0; var dictCopy = new Dictionary <PeptideSequenceModKey, PeptideMatch>(); // Make heavy mods explicit if (PeptideMatches.Values.Contains(match => match.NodePep.HasExplicitMods && match.NodePep.ExplicitMods.HeavyModifications != null)) { _matcher.ConvertAllHeavyModsExplicit(); } // Call ensure mods on all peptides to be added to the document. var listDefStatMods = new MappedList <string, StaticMod>(); listDefStatMods.AddRange(Properties.Settings.Default.StaticModList); listDefStatMods.AddRange(document.Settings.PeptideSettings.Modifications.StaticModifications); var listDefHeavyMods = new MappedList <string, StaticMod>(); listDefHeavyMods.AddRange(Properties.Settings.Default.HeavyModList); listDefHeavyMods.AddRange(document.Settings.PeptideSettings.Modifications.AllHeavyModifications); foreach (var key in PeptideMatches.Keys) { var match = PeptideMatches[key]; var nodePepDocSet = match.NodePep; if (_matcher.MatcherPepMods != null) { nodePepDocSet = match.NodePep.EnsureMods(_matcher.MatcherPepMods, document.Settings.PeptideSettings.Modifications, listDefStatMods, listDefHeavyMods); } if (!dictCopy.ContainsKey(nodePepDocSet.SequenceKey)) { dictCopy.Add(nodePepDocSet.SequenceKey, new PeptideMatch(nodePepDocSet, match.Proteins, match.MatchesFilterSettings)); } } if (!Properties.Settings.Default.LibraryPeptidesKeepFiltered) { // TODO: This removes entire peptides where only a single // precursor does not match. e.g. the library contains // a singly charged precursor match, but also doubly charged dictCopy = dictCopy.Where(match => match.Value.MatchesFilterSettings) .ToDictionary(match => match.Key, match => match.Value); } SrmDocument newDocument = UpdateExistingPeptides(document, dictCopy, toPath, out selectedPath); toPath = selectedPath; // If there is an associated background proteome, add peptides that can be // matched to the proteins from the background proteom. if (_backgroundProteome != null) { newDocument = AddProteomePeptides(newDocument, dictCopy, broker, toPath, out selectedPath); } toPath = selectedPath; // Add all remaining peptides as a peptide list. if (_backgroundProteome == null || Properties.Settings.Default.LibraryPeptidesAddUnmatched) { var listPeptidesToAdd = dictCopy.Values.ToList(); listPeptidesToAdd.RemoveAll(match => match.Proteins != null && match.Proteins.Count > 0); if (listPeptidesToAdd.Count > 0) { newDocument = AddPeptidesToLibraryGroup(newDocument, listPeptidesToAdd, broker, toPath, out selectedPath); if (listPeptidesToAdd.Count > 1000) { selectedPath = selectedPath.Parent; // Don't force Skyline to open a massive peptide list, if it wouldn't otherwise } } } return(newDocument); }
/// <summary> /// Adds a list of PeptideDocNodes found in the library to the current document. /// </summary> public SrmDocument AddPeptides(SrmDocument document, ILongWaitBroker broker, IdentityPath toPath, out IdentityPath selectedPath) { if (toPath != null && toPath.Depth == (int)SrmDocument.Level.MoleculeGroups && ReferenceEquals(toPath.GetIdentity((int)SrmDocument.Level.MoleculeGroups), SequenceTree.NODE_INSERT_ID)) { toPath = null; } SkippedPeptideCount = 0; var dictCopy = new Dictionary<PeptideSequenceModKey, PeptideMatch>(); // Make heavy mods explicit if (PeptideMatches.Values.Contains(match => match.NodePep.HasExplicitMods && match.NodePep.ExplicitMods.HeavyModifications != null)) { _matcher.ConvertAllHeavyModsExplicit(); } // Call ensure mods on all peptides to be added to the document. var listDefStatMods = new MappedList<string, StaticMod>(); listDefStatMods.AddRange(Properties.Settings.Default.StaticModList); listDefStatMods.AddRange(document.Settings.PeptideSettings.Modifications.StaticModifications); var listDefHeavyMods = new MappedList<string, StaticMod>(); listDefHeavyMods.AddRange(Properties.Settings.Default.HeavyModList); listDefHeavyMods.AddRange(document.Settings.PeptideSettings.Modifications.HeavyModifications); foreach (var key in PeptideMatches.Keys) { var match = PeptideMatches[key]; var nodePepDocSet = match.NodePep; if (_matcher.MatcherPepMods != null) nodePepDocSet = match.NodePep.EnsureMods(_matcher.MatcherPepMods, document.Settings.PeptideSettings.Modifications, listDefStatMods, listDefHeavyMods); if (!dictCopy.ContainsKey(nodePepDocSet.SequenceKey)) dictCopy.Add(nodePepDocSet.SequenceKey, new PeptideMatch(nodePepDocSet, match.Proteins, match.MatchesFilterSettings)); } if (!Properties.Settings.Default.LibraryPeptidesKeepFiltered) { // TODO: This removes entire peptides where only a single // precursor does not match. e.g. the library contains // a singly charged precursor match, but also doubly charged dictCopy = dictCopy.Where(match => match.Value.MatchesFilterSettings) .ToDictionary(match => match.Key, match => match.Value); } SrmDocument newDocument = UpdateExistingPeptides(document, dictCopy, toPath, out selectedPath); toPath = selectedPath; // If there is an associated background proteome, add peptides that can be // matched to the proteins from the background proteom. if (_backgroundProteome != null) { newDocument = AddProteomePeptides(newDocument, dictCopy, broker, toPath, out selectedPath); } toPath = selectedPath; // Add all remaining peptides as a peptide list. if (_backgroundProteome == null || Properties.Settings.Default.LibraryPeptidesAddUnmatched) { var listPeptidesToAdd = dictCopy.Values.ToList(); listPeptidesToAdd.RemoveAll(match => match.Proteins != null && match.Proteins.Count > 0); if (listPeptidesToAdd.Count > 0) { newDocument = AddPeptidesToLibraryGroup(newDocument, listPeptidesToAdd, broker, toPath, out selectedPath); } } return newDocument; }
public void IdentityPathTest() { SimpleIdentity id = new SimpleIdentity(); IdentityPath pathSimple = new IdentityPath(id); Assert.AreEqual(IdentityPath.ROOT, pathSimple.Parent); Assert.AreEqual(IdentityPath.ROOT, pathSimple.GetPathTo(-1)); Assert.AreEqual(id, pathSimple.Child); Assert.AreEqual(1, pathSimple.Length); Assert.AreEqual(0, pathSimple.Depth); AssertEx.ThrowsException<IndexOutOfRangeException>(() => pathSimple.GetPathTo(-2)); AssertEx.ThrowsException<IndexOutOfRangeException>(() => pathSimple.GetPathTo(1)); IdentityPath pathRoot = new IdentityPath(new Identity[0]); Assert.AreEqual(IdentityPath.ROOT, pathRoot); Assert.AreEqual(0, pathRoot.Length); Assert.AreEqual(-1, pathRoot.Depth); AssertEx.ThrowsException<IndexOutOfRangeException>(() => pathRoot.Child); AssertEx.ThrowsException<IndexOutOfRangeException>(() => pathRoot.Parent); const int count = 5; List<Identity> listId1 = new List<Identity>(); List<Identity> listId2 = new List<Identity>(); HashSet<IdentityPath> setPaths = new HashSet<IdentityPath>(); IdentityPath last = IdentityPath.ROOT; for (int i = 0; i < count; i++) { listId1.Add(new NumericIdentity(i)); listId2.Add(new NumericIdentity(i)); IdentityPath path = new IdentityPath(listId1); Assert.AreEqual(path, new IdentityPath(listId1)); Assert.AreNotEqual(path, new IdentityPath(listId2)); Assert.AreEqual(last, path.Parent); Assert.AreSame(listId1[i], path.Child); Assert.AreEqual(path, new IdentityPath(path.Parent, path.Child)); Assert.AreEqual(i, path.Depth); Assert.AreEqual(listId1.Count, path.Length); Assert.AreSame(path.Child, path.GetIdentity(path.Depth)); Assert.AreEqual(path.Parent, path.GetPathTo(path.Depth - 1)); Assert.AreEqual("/" + listId1.ToString("/"), path.ToString()); for (int j = 0; j < i; j++) Assert.IsTrue(setPaths.Contains(path.GetPathTo(j))); setPaths.Add(path); last = path; } }