Beispiel #1
0
        /// <summary>
        /// Gets the sequence.
        /// </summary>
        /// <param name="proteoform">The proteoform.</param>
        /// <returns></returns>
        public static string?GetSequence(this IProteoformGroup proteoform)
        {
            if (proteoform?.Residues == null)
            {
                return(null);
            }

            return(new string(proteoform.Residues.Select(x => x.Symbol).ToArray()));
        }
Beispiel #2
0
        /// <summary>Generates a chemical proteoform hash for the specified proteoform.</summary>
        /// <param name="proForma">The ProForma.</param>
        /// <returns></returns>
        public IChemicalProteoformHash Generate(string proForma)
        {
            // Parse string into term
            ProFormaTerm originalProFormaTerm = this._proFormaParser.ParseString(proForma);

            // Check to see if this is only a sequence
            if (originalProFormaTerm.NTerminalDescriptors == null &&
                originalProFormaTerm.CTerminalDescriptors == null &&
                originalProFormaTerm.Tags == null &&
                originalProFormaTerm.GlobalModifications == null &&
                originalProFormaTerm.LabileDescriptors == null &&
                originalProFormaTerm.TagGroups == null &&
                originalProFormaTerm.UnlocalizedTags == null)
            {
                IProteoformGroup simpleGroup = this._proteoformGroupFactory.CreateProteoformGroup(originalProFormaTerm.Sequence);

                return(new ChemicalProteoformHash(originalProFormaTerm.Sequence, simpleGroup));
            }

            // Create proteoform group (flattens all features into Ids)
            IProteoformGroup proteoformGroup = this._proteoformGroupFactory.CreateProteoformGroup(originalProFormaTerm,
                                                                                                  this._proteoformModificationLookup);

            ProFormaDescriptor?nTermDescriptor =
                this.CreateDescriptor(proteoformGroup.NTerminalModification);
            IList <ProFormaDescriptor>?nTermDescriptors = nTermDescriptor == null ? null : new[] { nTermDescriptor };

            ProFormaDescriptor?cTermDescriptor =
                this.CreateDescriptor(proteoformGroup.CTerminalModification);
            IList <ProFormaDescriptor>?cTermDescriptors = cTermDescriptor == null ? null : new[] { cTermDescriptor };

            IList <ProFormaTag>?               tags = null;
            IList <ProFormaDescriptor>?        labileDescriptors   = null;
            IList <ProFormaUnlocalizedTag>?    unlocalizedTags     = null;
            IList <ProFormaTagGroup>?          tagGroups           = null;
            IList <ProFormaGlobalModification>?globalModifications = null;

            if (proteoformGroup.LocalizedModifications?.Count > 0)
            {
                foreach (var mod in proteoformGroup.LocalizedModifications)
                {
                    ProFormaDescriptor?descriptor = this.CreateDescriptor(mod.ModificationDelta);

                    if (descriptor != null)
                    {
                        tags ??= new List <ProFormaTag>();
                        tags.Add(new ProFormaTag(mod.ZeroBasedStartIndex, mod.ZeroBasedEndIndex, new[] { descriptor }));
                    }
                }
            }

            if (proteoformGroup.UnlocalizedModifications?.Count > 0)
            {
                foreach (var mod in proteoformGroup.UnlocalizedModifications)
                {
                    ProFormaDescriptor?descriptor = this.CreateDescriptor(mod.ModificationDelta);

                    if (descriptor != null)
                    {
                        if (mod.IsLabile)
                        {
                            labileDescriptors ??= new List <ProFormaDescriptor>();

                            for (int i = 0; i < mod.Count; i++)
                            {
                                labileDescriptors.Add(descriptor);
                            }
                        }
                        else
                        {
                            unlocalizedTags ??= new List <ProFormaUnlocalizedTag>();
                            unlocalizedTags.Add(new ProFormaUnlocalizedTag(mod.Count, new[] { descriptor }));
                        }
                    }
                }
            }

            if (proteoformGroup.ModificationGroups?.Count > 0)
            {
                foreach (var mod in proteoformGroup.ModificationGroups)
                {
                    ProFormaDescriptor?descriptor = this.CreateDescriptor(mod.ModificationDelta);

                    if (descriptor != null)
                    {
                        tagGroups ??= new List <ProFormaTagGroup>();
                        tagGroups.Add(new ProFormaTagGroup(mod.GroupName, descriptor.Key, descriptor.EvidenceType, descriptor.Value,
                                                           mod.Members.Select(x => new ProFormaMembershipDescriptor(x.ZeroBasedStartIndex, x.ZeroBasedEndIndex, x.Weight)).ToList()));
                    }
                }
            }

            if (proteoformGroup.GlobalModifications?.Count > 0)
            {
                foreach (var mod in proteoformGroup.GlobalModifications)
                {
                    ProFormaDescriptor?descriptor = this.CreateDescriptor(mod.ModificationDelta);

                    if (descriptor != null)
                    {
                        globalModifications ??= new List <ProFormaGlobalModification>();
                        globalModifications.Add(new ProFormaGlobalModification(new[] { descriptor }, mod.TargetAminoAcids));
                    }
                }
            }

            string?sequence = proteoformGroup.GetSequence();

            if (sequence != null)
            {
                ProFormaTerm proFormaTerm = new(sequence, tags : tags?.OrderBy(t => t.Descriptors.First().Value).ToArray(),
                                                nTerminalDescriptors : nTermDescriptors,
                                                cTerminalDescriptors : cTermDescriptors,
                                                labileDescriptors : labileDescriptors,
                                                unlocalizedTags : unlocalizedTags,
                                                tagGroups : tagGroups,
                                                globalModifications : globalModifications);
                string hash = new ProFormaWriter().WriteString(proFormaTerm);
                return(new ChemicalProteoformHash(hash, proteoformGroup));
            }

            throw new Exception("Cannot get amino acid sequence for the proteoform group.");
        }