Esempio n. 1
0
        public static List <IReaction> SplitReaction(IReaction rxn, IDictionary <IAtom, IAtom> map)
        {
            var rctPrdPairs = from rct in rxn.Reactants
                              from prd in rxn.Products
                              select new { rct, prd };
            var mappedPairs = rctPrdPairs.Where(pair => {
                if (map.Count == 0)
                {
                    return(true);
                }
                else if (pair.rct.Atoms.Any(rctAtom => {
                    if (map.ContainsKey(rctAtom) && pair.prd.Atoms.Contains(map[rctAtom]))
                    {
                        return(true);
                    }
                    else
                    {
                        return(false);
                    }
                }))
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            });

            return(mappedPairs.Select(mp => {
                var newRxn = ChemObjectBuilder.Instance.NewReaction();
                newRxn.Reactants.Add(AtomContainerManipulator.CopyAndSuppressedHydrogens(mp.rct));
                newRxn.Products.Add(AtomContainerManipulator.CopyAndSuppressedHydrogens(mp.prd));
                newRxn.SetProperty(CDKPropertyName.SMILES, sg.Create(newRxn));
                return newRxn;
            }).ToList());
        }
Esempio n. 2
0
        /// <summary>
        /// Find all enabled abbreviations in the provided molecule. They are not
        /// added to the existing Sgroups and may need filtering.
        /// </summary>
        /// <param name="mol">molecule</param>
        /// <returns>list of new abbreviation Sgroups</returns>
        public IList <Sgroup> Generate(IAtomContainer mol)
        {
            // mark which atoms have already been abbreviated or are
            // part of an existing Sgroup
            var usedAtoms = new HashSet <IAtom>();
            var sgroups   = mol.GetCtabSgroups();

            if (sgroups != null)
            {
                foreach (var sgroup in sgroups)
                {
                    foreach (var atom in sgroup.Atoms)
                    {
                        usedAtoms.Add(atom);
                    }
                }
            }

            var newSgroups = new List <Sgroup>();

            // disconnected abbreviations, salts, common reagents, large compounds
            if (!usedAtoms.Any())
            {
                try
                {
                    var    copy   = AtomContainerManipulator.CopyAndSuppressedHydrogens(mol);
                    string cansmi = usmigen.Create(copy);
                    if (disconnectedAbbreviations.TryGetValue(cansmi, out string label) && !disabled.Contains(label) && ContractToSingleLabel)
                    {
                        var sgroup = new Sgroup
                        {
                            Type      = SgroupType.CtabAbbreviation,
                            Subscript = label
                        };
                        foreach (var atom in mol.Atoms)
                        {
                            sgroup.Atoms.Add(atom);
                        }
                        return(new[] { sgroup });
                    }
                    else if (cansmi.Contains("."))
                    {
                        var parts = ConnectivityChecker.PartitionIntoMolecules(mol);

                        // leave one out
                        Sgroup best = null;
                        for (int i = 0; i < parts.Count; i++)
                        {
                            var a = parts[i];
                            var b = a.Builder.NewAtomContainer();
                            for (int j = 0; j < parts.Count; j++)
                            {
                                if (j != i)
                                {
                                    b.Add(parts[j]);
                                }
                            }
                            var sgroup1 = GetAbbr(a);
                            var sgroup2 = GetAbbr(b);
                            if (sgroup1 != null && sgroup2 != null && ContractToSingleLabel)
                            {
                                var combined = new Sgroup();
                                label = null;
                                foreach (var atom in sgroup1.Atoms)
                                {
                                    combined.Atoms.Add(atom);
                                }
                                foreach (var atom in sgroup2.Atoms)
                                {
                                    combined.Atoms.Add(atom);
                                }
                                if (sgroup1.Subscript.Length > sgroup2.Subscript.Length)
                                {
                                    combined.Subscript = sgroup1.Subscript + String_Interpunct + sgroup2.Subscript;
                                }
                                else
                                {
                                    combined.Subscript = sgroup2.Subscript + String_Interpunct + sgroup1.Subscript;
                                }
                                combined.Type = SgroupType.CtabAbbreviation;
                                return(new[] { combined });
                            }
                            if (sgroup1 != null && (best == null || sgroup1.Atoms.Count > best.Atoms.Count))
                            {
                                best = sgroup1;
                            }
                            if (sgroup2 != null && (best == null || sgroup2.Atoms.Count < best.Atoms.Count))
                            {
                                best = sgroup2;
                            }
                        }
                        if (best != null)
                        {
                            newSgroups.Add(best);
                            foreach (var atom in best.Atoms)
                            {
                                usedAtoms.Add(atom);
                            }
                        }
                    }
                }
                catch (CDKException)
                {
                }
            }

            var fragments  = GenerateFragments(mol);
            var sgroupAdjs = new MultiDictionary <IAtom, Sgroup>();

            foreach (var frag in fragments)
            {
                try
                {
                    var smi = usmigen.Create(AtomContainerManipulator.CopyAndSuppressedHydrogens(frag));
                    if (!connectedAbbreviations.TryGetValue(smi, out string label) || disabled.Contains(label))
                    {
                        continue;
                    }

                    bool overlap = false;

                    // note: first atom is '*'
                    var numAtoms = frag.Atoms.Count;
                    var numBonds = frag.Bonds.Count;
                    for (int i = 1; i < numAtoms; i++)
                    {
                        if (usedAtoms.Contains(frag.Atoms[i]))
                        {
                            overlap = true;
                            break;
                        }
                    }

                    // overlaps with previous assignment
                    if (overlap)
                    {
                        continue;
                    }

                    // create new abbreviation Sgroup
                    var sgroup = new Sgroup
                    {
                        Type      = SgroupType.CtabAbbreviation,
                        Subscript = label
                    };

                    var   attachBond = frag.Bonds[0].GetProperty <IBond>(PropertyName_CutBond);
                    IAtom attachAtom = null;
                    sgroup.Bonds.Add(attachBond);
                    for (int i = 1; i < numAtoms; i++)
                    {
                        var atom = frag.Atoms[i];
                        usedAtoms.Add(atom);
                        sgroup.Atoms.Add(atom);
                        if (attachBond.Begin.Equals(atom))
                        {
                            attachAtom = attachBond.End;
                        }
                        else if (attachBond.End.Equals(atom))
                        {
                            attachAtom = attachBond.Begin;
                        }
                    }

                    if (attachAtom != null)
                    {
                        sgroupAdjs.Add(attachAtom, sgroup);
                    }
                    newSgroups.Add(sgroup);
                }
                catch (CDKException)
                {
                    // ignore
                }
            }

            if (!ContractOnHetero)
            {
                return(newSgroups);
            }

            // now collapse
            foreach (var attach in mol.Atoms)
            {
                if (usedAtoms.Contains(attach))
                {
                    continue;
                }

                // skip charged or isotopic labelled, C or R/*, H, He
                if ((attach.FormalCharge != null && attach.FormalCharge != 0) ||
                    attach.MassNumber != null ||
                    attach.AtomicNumber == 6 ||
                    attach.AtomicNumber < 2)
                {
                    continue;
                }

                var hcount   = attach.ImplicitHydrogenCount.Value;
                var xatoms   = new HashSet <IAtom>();
                var xbonds   = new HashSet <IBond>();
                var newbonds = new HashSet <IBond>();
                xatoms.Add(attach);

                var nbrSymbols = new List <string>();
                var todelete   = new HashSet <Sgroup>();
                foreach (var sgroup in sgroupAdjs[attach])
                {
                    if (ContainsChargeChar(sgroup.Subscript))
                    {
                        continue;
                    }
                    if (sgroup.Bonds.Count != 1)
                    {
                        continue;
                    }
                    var xbond = sgroup.Bonds.First();
                    xbonds.Add(xbond);
                    foreach (var a in sgroup.Atoms)
                    {
                        xatoms.Add(a);
                    }
                    if (attach.Symbol.Length == 1 &&
                        char.IsLower(sgroup.Subscript[0]))
                    {
                        if (ChemicalElement.OfSymbol(attach.Symbol + sgroup.Subscript[0]) != ChemicalElement.R)
                        {
                            goto continue_collapse;
                        }
                    }
                    nbrSymbols.Add(sgroup.Subscript);
                    todelete.Add(sgroup);
                }
                int numSGrpNbrs = nbrSymbols.Count;
                foreach (var bond in mol.GetConnectedBonds(attach))
                {
                    if (!xbonds.Contains(bond))
                    {
                        var nbr = bond.GetOther(attach);
                        // contract terminal bonds
                        if (mol.GetConnectedBonds(nbr).Count() == 1)
                        {
                            if (nbr.MassNumber != null ||
                                (nbr.FormalCharge != null && nbr.FormalCharge != 0))
                            {
                                newbonds.Add(bond);
                            }
                            else if (nbr.AtomicNumber == 1)
                            {
                                hcount++;
                                xatoms.Add(nbr);
                            }
                            else if (nbr.AtomicNumber > 0)
                            {
                                nbrSymbols.Add(NewSymbol(nbr.AtomicNumber, nbr.ImplicitHydrogenCount.Value, false));
                                xatoms.Add(nbr);
                            }
                        }
                        else
                        {
                            newbonds.Add(bond);
                        }
                    }
                }

                // reject if no symbols
                // reject if no bonds (<1), except if all symbols are identical... (HashSet.size==1)
                // reject if more that 2 bonds
                if (!nbrSymbols.Any() ||
                    newbonds.Count < 1 && (new HashSet <string>(nbrSymbols).Count != 1) ||
                    newbonds.Count > 2)
                {
                    continue;
                }

                // create the symbol
                var sb = new StringBuilder();
                sb.Append(NewSymbol(attach.AtomicNumber, hcount, newbonds.Count == 0));
                string prev  = null;
                int    count = 0;
                nbrSymbols.Sort((o1, o2) =>
                {
                    int cmp = o1.Length.CompareTo(o2.Length);
                    if (cmp != 0)
                    {
                        return(cmp);
                    }
                    return(o1.CompareTo(o2));
                });
                foreach (string nbrSymbol in nbrSymbols)
                {
                    if (nbrSymbol.Equals(prev))
                    {
                        count++;
                    }
                    else
                    {
                        bool useParen = count == 0 || CountUpper(prev) > 1 || (prev != null && nbrSymbol.StartsWith(prev));
                        AppendGroup(sb, prev, count, useParen);
                        prev  = nbrSymbol;
                        count = 1;
                    }
                }
                AppendGroup(sb, prev, count, false);

                // remove existing
                foreach (var e in todelete)
                {
                    newSgroups.Remove(e);
                }

                // create new
                var newSgroup = new Sgroup
                {
                    Type      = SgroupType.CtabAbbreviation,
                    Subscript = sb.ToString()
                };
                foreach (var bond in newbonds)
                {
                    newSgroup.Bonds.Add(bond);
                }
                foreach (var atom in xatoms)
                {
                    newSgroup.Atoms.Add(atom);
                }

                newSgroups.Add(newSgroup);
                foreach (var a in xatoms)
                {
                    usedAtoms.Add(a);
                }
continue_collapse:
                ;
            }

            return(newSgroups);
        }