/// <summary> /// Gets the sequence. /// </summary> /// <param name="proteoform">The proteoform.</param> /// <returns></returns> public static string?GetSequence(this IProteoformGroup proteoform) { if (proteoform?.Residues == null) { return(null); } return(new string(proteoform.Residues.Select(x => x.Symbol).ToArray())); }
/// <summary>Generates a chemical proteoform hash for the specified proteoform.</summary> /// <param name="proForma">The ProForma.</param> /// <returns></returns> public IChemicalProteoformHash Generate(string proForma) { // Parse string into term ProFormaTerm originalProFormaTerm = this._proFormaParser.ParseString(proForma); // Check to see if this is only a sequence if (originalProFormaTerm.NTerminalDescriptors == null && originalProFormaTerm.CTerminalDescriptors == null && originalProFormaTerm.Tags == null && originalProFormaTerm.GlobalModifications == null && originalProFormaTerm.LabileDescriptors == null && originalProFormaTerm.TagGroups == null && originalProFormaTerm.UnlocalizedTags == null) { IProteoformGroup simpleGroup = this._proteoformGroupFactory.CreateProteoformGroup(originalProFormaTerm.Sequence); return(new ChemicalProteoformHash(originalProFormaTerm.Sequence, simpleGroup)); } // Create proteoform group (flattens all features into Ids) IProteoformGroup proteoformGroup = this._proteoformGroupFactory.CreateProteoformGroup(originalProFormaTerm, this._proteoformModificationLookup); ProFormaDescriptor?nTermDescriptor = this.CreateDescriptor(proteoformGroup.NTerminalModification); IList <ProFormaDescriptor>?nTermDescriptors = nTermDescriptor == null ? null : new[] { nTermDescriptor }; ProFormaDescriptor?cTermDescriptor = this.CreateDescriptor(proteoformGroup.CTerminalModification); IList <ProFormaDescriptor>?cTermDescriptors = cTermDescriptor == null ? null : new[] { cTermDescriptor }; IList <ProFormaTag>? tags = null; IList <ProFormaDescriptor>? labileDescriptors = null; IList <ProFormaUnlocalizedTag>? unlocalizedTags = null; IList <ProFormaTagGroup>? tagGroups = null; IList <ProFormaGlobalModification>?globalModifications = null; if (proteoformGroup.LocalizedModifications?.Count > 0) { foreach (var mod in proteoformGroup.LocalizedModifications) { ProFormaDescriptor?descriptor = this.CreateDescriptor(mod.ModificationDelta); if (descriptor != null) { tags ??= new List <ProFormaTag>(); tags.Add(new ProFormaTag(mod.ZeroBasedStartIndex, mod.ZeroBasedEndIndex, new[] { descriptor })); } } } if (proteoformGroup.UnlocalizedModifications?.Count > 0) { foreach (var mod in proteoformGroup.UnlocalizedModifications) { ProFormaDescriptor?descriptor = this.CreateDescriptor(mod.ModificationDelta); if (descriptor != null) { if (mod.IsLabile) { labileDescriptors ??= new List <ProFormaDescriptor>(); for (int i = 0; i < mod.Count; i++) { labileDescriptors.Add(descriptor); } } else { unlocalizedTags ??= new List <ProFormaUnlocalizedTag>(); unlocalizedTags.Add(new ProFormaUnlocalizedTag(mod.Count, new[] { descriptor })); } } } } if (proteoformGroup.ModificationGroups?.Count > 0) { foreach (var mod in proteoformGroup.ModificationGroups) { ProFormaDescriptor?descriptor = this.CreateDescriptor(mod.ModificationDelta); if (descriptor != null) { tagGroups ??= new List <ProFormaTagGroup>(); tagGroups.Add(new ProFormaTagGroup(mod.GroupName, descriptor.Key, descriptor.EvidenceType, descriptor.Value, mod.Members.Select(x => new ProFormaMembershipDescriptor(x.ZeroBasedStartIndex, x.ZeroBasedEndIndex, x.Weight)).ToList())); } } } if (proteoformGroup.GlobalModifications?.Count > 0) { foreach (var mod in proteoformGroup.GlobalModifications) { ProFormaDescriptor?descriptor = this.CreateDescriptor(mod.ModificationDelta); if (descriptor != null) { globalModifications ??= new List <ProFormaGlobalModification>(); globalModifications.Add(new ProFormaGlobalModification(new[] { descriptor }, mod.TargetAminoAcids)); } } } string?sequence = proteoformGroup.GetSequence(); if (sequence != null) { ProFormaTerm proFormaTerm = new(sequence, tags : tags?.OrderBy(t => t.Descriptors.First().Value).ToArray(), nTerminalDescriptors : nTermDescriptors, cTerminalDescriptors : cTermDescriptors, labileDescriptors : labileDescriptors, unlocalizedTags : unlocalizedTags, tagGroups : tagGroups, globalModifications : globalModifications); string hash = new ProFormaWriter().WriteString(proFormaTerm); return(new ChemicalProteoformHash(hash, proteoformGroup)); } throw new Exception("Cannot get amino acid sequence for the proteoform group."); }