public static List <IReaction> SplitReaction(IReaction rxn, IDictionary <IAtom, IAtom> map) { var rctPrdPairs = from rct in rxn.Reactants from prd in rxn.Products select new { rct, prd }; var mappedPairs = rctPrdPairs.Where(pair => { if (map.Count == 0) { return(true); } else if (pair.rct.Atoms.Any(rctAtom => { if (map.ContainsKey(rctAtom) && pair.prd.Atoms.Contains(map[rctAtom])) { return(true); } else { return(false); } })) { return(true); } else { return(false); } }); return(mappedPairs.Select(mp => { var newRxn = ChemObjectBuilder.Instance.NewReaction(); newRxn.Reactants.Add(AtomContainerManipulator.CopyAndSuppressedHydrogens(mp.rct)); newRxn.Products.Add(AtomContainerManipulator.CopyAndSuppressedHydrogens(mp.prd)); newRxn.SetProperty(CDKPropertyName.SMILES, sg.Create(newRxn)); return newRxn; }).ToList()); }
/// <summary> /// Find all enabled abbreviations in the provided molecule. They are not /// added to the existing Sgroups and may need filtering. /// </summary> /// <param name="mol">molecule</param> /// <returns>list of new abbreviation Sgroups</returns> public IList <Sgroup> Generate(IAtomContainer mol) { // mark which atoms have already been abbreviated or are // part of an existing Sgroup var usedAtoms = new HashSet <IAtom>(); var sgroups = mol.GetCtabSgroups(); if (sgroups != null) { foreach (var sgroup in sgroups) { foreach (var atom in sgroup.Atoms) { usedAtoms.Add(atom); } } } var newSgroups = new List <Sgroup>(); // disconnected abbreviations, salts, common reagents, large compounds if (!usedAtoms.Any()) { try { var copy = AtomContainerManipulator.CopyAndSuppressedHydrogens(mol); string cansmi = usmigen.Create(copy); if (disconnectedAbbreviations.TryGetValue(cansmi, out string label) && !disabled.Contains(label) && ContractToSingleLabel) { var sgroup = new Sgroup { Type = SgroupType.CtabAbbreviation, Subscript = label }; foreach (var atom in mol.Atoms) { sgroup.Atoms.Add(atom); } return(new[] { sgroup }); } else if (cansmi.Contains(".")) { var parts = ConnectivityChecker.PartitionIntoMolecules(mol); // leave one out Sgroup best = null; for (int i = 0; i < parts.Count; i++) { var a = parts[i]; var b = a.Builder.NewAtomContainer(); for (int j = 0; j < parts.Count; j++) { if (j != i) { b.Add(parts[j]); } } var sgroup1 = GetAbbr(a); var sgroup2 = GetAbbr(b); if (sgroup1 != null && sgroup2 != null && ContractToSingleLabel) { var combined = new Sgroup(); label = null; foreach (var atom in sgroup1.Atoms) { combined.Atoms.Add(atom); } foreach (var atom in sgroup2.Atoms) { combined.Atoms.Add(atom); } if (sgroup1.Subscript.Length > sgroup2.Subscript.Length) { combined.Subscript = sgroup1.Subscript + String_Interpunct + sgroup2.Subscript; } else { combined.Subscript = sgroup2.Subscript + String_Interpunct + sgroup1.Subscript; } combined.Type = SgroupType.CtabAbbreviation; return(new[] { combined }); } if (sgroup1 != null && (best == null || sgroup1.Atoms.Count > best.Atoms.Count)) { best = sgroup1; } if (sgroup2 != null && (best == null || sgroup2.Atoms.Count < best.Atoms.Count)) { best = sgroup2; } } if (best != null) { newSgroups.Add(best); foreach (var atom in best.Atoms) { usedAtoms.Add(atom); } } } } catch (CDKException) { } } var fragments = GenerateFragments(mol); var sgroupAdjs = new MultiDictionary <IAtom, Sgroup>(); foreach (var frag in fragments) { try { var smi = usmigen.Create(AtomContainerManipulator.CopyAndSuppressedHydrogens(frag)); if (!connectedAbbreviations.TryGetValue(smi, out string label) || disabled.Contains(label)) { continue; } bool overlap = false; // note: first atom is '*' var numAtoms = frag.Atoms.Count; var numBonds = frag.Bonds.Count; for (int i = 1; i < numAtoms; i++) { if (usedAtoms.Contains(frag.Atoms[i])) { overlap = true; break; } } // overlaps with previous assignment if (overlap) { continue; } // create new abbreviation Sgroup var sgroup = new Sgroup { Type = SgroupType.CtabAbbreviation, Subscript = label }; var attachBond = frag.Bonds[0].GetProperty <IBond>(PropertyName_CutBond); IAtom attachAtom = null; sgroup.Bonds.Add(attachBond); for (int i = 1; i < numAtoms; i++) { var atom = frag.Atoms[i]; usedAtoms.Add(atom); sgroup.Atoms.Add(atom); if (attachBond.Begin.Equals(atom)) { attachAtom = attachBond.End; } else if (attachBond.End.Equals(atom)) { attachAtom = attachBond.Begin; } } if (attachAtom != null) { sgroupAdjs.Add(attachAtom, sgroup); } newSgroups.Add(sgroup); } catch (CDKException) { // ignore } } if (!ContractOnHetero) { return(newSgroups); } // now collapse foreach (var attach in mol.Atoms) { if (usedAtoms.Contains(attach)) { continue; } // skip charged or isotopic labelled, C or R/*, H, He if ((attach.FormalCharge != null && attach.FormalCharge != 0) || attach.MassNumber != null || attach.AtomicNumber == 6 || attach.AtomicNumber < 2) { continue; } var hcount = attach.ImplicitHydrogenCount.Value; var xatoms = new HashSet <IAtom>(); var xbonds = new HashSet <IBond>(); var newbonds = new HashSet <IBond>(); xatoms.Add(attach); var nbrSymbols = new List <string>(); var todelete = new HashSet <Sgroup>(); foreach (var sgroup in sgroupAdjs[attach]) { if (ContainsChargeChar(sgroup.Subscript)) { continue; } if (sgroup.Bonds.Count != 1) { continue; } var xbond = sgroup.Bonds.First(); xbonds.Add(xbond); foreach (var a in sgroup.Atoms) { xatoms.Add(a); } if (attach.Symbol.Length == 1 && char.IsLower(sgroup.Subscript[0])) { if (ChemicalElement.OfSymbol(attach.Symbol + sgroup.Subscript[0]) != ChemicalElement.R) { goto continue_collapse; } } nbrSymbols.Add(sgroup.Subscript); todelete.Add(sgroup); } int numSGrpNbrs = nbrSymbols.Count; foreach (var bond in mol.GetConnectedBonds(attach)) { if (!xbonds.Contains(bond)) { var nbr = bond.GetOther(attach); // contract terminal bonds if (mol.GetConnectedBonds(nbr).Count() == 1) { if (nbr.MassNumber != null || (nbr.FormalCharge != null && nbr.FormalCharge != 0)) { newbonds.Add(bond); } else if (nbr.AtomicNumber == 1) { hcount++; xatoms.Add(nbr); } else if (nbr.AtomicNumber > 0) { nbrSymbols.Add(NewSymbol(nbr.AtomicNumber, nbr.ImplicitHydrogenCount.Value, false)); xatoms.Add(nbr); } } else { newbonds.Add(bond); } } } // reject if no symbols // reject if no bonds (<1), except if all symbols are identical... (HashSet.size==1) // reject if more that 2 bonds if (!nbrSymbols.Any() || newbonds.Count < 1 && (new HashSet <string>(nbrSymbols).Count != 1) || newbonds.Count > 2) { continue; } // create the symbol var sb = new StringBuilder(); sb.Append(NewSymbol(attach.AtomicNumber, hcount, newbonds.Count == 0)); string prev = null; int count = 0; nbrSymbols.Sort((o1, o2) => { int cmp = o1.Length.CompareTo(o2.Length); if (cmp != 0) { return(cmp); } return(o1.CompareTo(o2)); }); foreach (string nbrSymbol in nbrSymbols) { if (nbrSymbol.Equals(prev)) { count++; } else { bool useParen = count == 0 || CountUpper(prev) > 1 || (prev != null && nbrSymbol.StartsWith(prev)); AppendGroup(sb, prev, count, useParen); prev = nbrSymbol; count = 1; } } AppendGroup(sb, prev, count, false); // remove existing foreach (var e in todelete) { newSgroups.Remove(e); } // create new var newSgroup = new Sgroup { Type = SgroupType.CtabAbbreviation, Subscript = sb.ToString() }; foreach (var bond in newbonds) { newSgroup.Bonds.Add(bond); } foreach (var atom in xatoms) { newSgroup.Atoms.Add(atom); } newSgroups.Add(newSgroup); foreach (var a in xatoms) { usedAtoms.Add(a); } continue_collapse: ; } return(newSgroups); }