Beispiel #1
0
        /// <summary>
        /// Atom-atom mapping of the input molecule to the bare container constructed from the InChI connection table.
        /// This makes it possible to map the positions of the mobile hydrogens in the InChI back to the input molecule.
        /// </summary>
        /// <param name="inchiMolGraph">molecule (bare) as defined in InChI</param>
        /// <param name="mol">user input molecule</param>
        /// <exception cref="CDKException"></exception>
        private static void MapInputMoleculeToInChIMolgraph(IAtomContainer inchiMolGraph, IAtomContainer mol)
        {
            var iter = VentoFoggia.FindIdentical(inchiMolGraph,
                                                 AtomMatcher.CreateElementMatcher(), BondMatcher.CreateAnyMatcher())
                       .MatchAll(mol)
                       .Limit(1)
                       .ToAtomMap();
            var i = iter.FirstOrDefault();

            if (i != null)
            {
                foreach (var e in i)
                {
                    var src      = e.Key;
                    var dst      = e.Value;
                    var position = src.Id;
                    dst.Id = position;
                    Debug.WriteLine("Mapped InChI " + src.Symbol + " " + src.Id + " to " + dst.Symbol + " " + dst.Id);
                }
            }
            else
            {
                throw new ArgumentException(CANSMI.Create(inchiMolGraph) + " " + CANSMI.Create(mol));
            }
        }
Beispiel #2
0
        public static void MakeCanonicalSmileFromRingSystems(string dataFileIn, string dataFileOut)
        {
            Console.Out.WriteLine("Start make SMILES...");
            var data   = new List <string>();
            var smiles = new SmilesGenerator();

            try
            {
                Console.Out.WriteLine("Start...");
                using (var imdl = new EnumerableSDFReader(new StreamReader(dataFileIn), builder))
                {
                    Console.Out.WriteLine("Read File in..");

                    foreach (var m in imdl)
                    {
                        try
                        {
                            data.Add((string)smiles.Create(builder.NewAtomContainer(m)));
                        }
                        catch (Exception exc1)
                        {
                            if (!(exc1 is CDKException || exc1 is IOException))
                            {
                                throw;
                            }
                            Console.Out.WriteLine("Could not create smile due to: " + exc1.Message);
                        }
                    }
                }
            }
            catch (Exception exc)
            {
                Console.Out.WriteLine("Could not read Molecules from file " + dataFileIn + " due to: " + exc.Message);
            }

            Console.Out.Write("...ready\nWrite data...");
            try
            {
                using (var fout = new StreamWriter(dataFileOut))
                {
                    for (int i = 0; i < data.Count; i++)
                    {
                        try
                        {
                            fout.Write(((string)data[i]));
                            fout.WriteLine();
                        }
                        catch (Exception)
                        {
                        }
                    }
                    Console.Out.WriteLine($"number of smiles: {data.Count}");
                }
            }
            catch (Exception exc3)
            {
                Console.Out.WriteLine($"Could not write smile in file {dataFileOut} due to: {exc3.Message}");
            }
            Console.Out.WriteLine("...ready");
        }
Beispiel #3
0
        public void TestSplit()
        {
            var          mol       = smilesParser.ParseSmiles("C1CC1C2CCC2");
            SpanningTree st        = new SpanningTree(mol);
            IRingSet     rings     = st.GetAllRings();
            IBond        splitBond = null;

            for (int i = 0; i < mol.Bonds.Count; i++)
            {
                if (rings.GetRings(mol.Bonds[i]).Count() == 0)
                {
                    splitBond = mol.Bonds[i];
                    break;
                }
            }
            var             frags       = FragmentUtils.SplitMolecule(mol, splitBond);
            SmilesGenerator sg          = new SmilesGenerator();
            var             uniqueFrags = new HashSet <string>();

            foreach (var frag in frags)
            {
                uniqueFrags.Add(sg.Create(frag));
            }
            Assert.AreEqual(2, uniqueFrags.Count);
            // You can put the fragments back together with a ring closure and dot
            // [CH]12CC1.[CH]12CCC1
            Assert.IsTrue(uniqueFrags.IsSupersetOf(new[] { "[CH]1CC1", "[CH]1CCC1" }));
        }
Beispiel #4
0
        /// <summary>
        /// Writes the content from molecule to output.
        /// </summary>
        /// <param name="molecule">Molecule of which the data is given as output.</param>
        public void WriteAtomContainer(IAtomContainer molecule)
        {
            SmilesGenerator sg = new SmilesGenerator();

            if (useAromaticityFlag.IsSet)
            {
                sg = sg.Aromatic();
            }
            string smiles = "";

            try
            {
                smiles = sg.Create(molecule);
                Debug.WriteLine($"Generated SMILES: {smiles}");
                writer.Write(smiles);
                writer.Write('\n');
                writer.Flush();
                Debug.WriteLine("file flushed...");
            }
            catch (Exception exc)
            {
                if (exc is CDKException | exc is IOException)
                {
                    Trace.TraceError($"Error while writing Molecule: {exc.Message}");
                    Debug.WriteLine(exc);
                }
                else
                {
                    throw;
                }
            }
        }
Beispiel #5
0
        //    private IAtomContainer CreateBenzaldehyde() {
        //        Molecule result = new DefaultMolecule();
        //        Atom c1 = result.Atoms.Add("C");
        //        Atom c2 = result.Atoms.Add("C");
        //        Atom c3 = result.Atoms.Add("C");
        //        Atom c4 = result.Atoms.Add("C");
        //        Atom c5 = result.Atoms.Add("C");
        //        Atom c6 = result.Atoms.Add("C");
        //        Atom c7 = result.Atoms.Add("C");
        //        Atom o8 = result.Atoms.Add("O");
        //
        //        result.Connect(c1, c2, 1);
        //        result.Connect(c2, c3, 2);
        //        result.Connect(c3, c4, 1);
        //        result.Connect(c4, c5, 2);
        //        result.Connect(c5, c6, 1);
        //        result.Connect(c6, c1, 2);
        //        result.Connect(c7, c1, 1);
        //        result.Connect(c7, o8, 2);
        //
        //        return result;
        //    }
        //
        //    private IAtomContainer CreateBenzoicAcid() {
        //        Molecule result = CreateBenzaldehyde();
        //
        //        result.Connect(result.Atoms[6], result.Atoms.Add("O"), 1);
        //
        //        return result;
        //    }
        //
        //    private IAtomContainer CreateBlockedBenzaldehyde() {
        //        Molecule result = CreateBenzaldehyde();
        //
        //        result.Connect(result.Atoms[6], result.Atoms.Add("H"), 1);
        //
        //        return result;
        //    }
        //    private Molecule Create4Toluene() {
        //        Molecule result = new DefaultMolecule();
        //        Atom c1 = result.Atoms.Add("C");
        //        Atom c2 = result.Atoms.Add("C");
        //        Atom c3 = result.Atoms.Add("C");
        //        Atom c4 = result.Atoms.Add("C");
        //        Atom c5 = result.Atoms.Add("C");
        //        Atom c6 = result.Atoms.Add("C");
        //        Atom c7 = result.Atoms.Add("C");
        //
        //        result.Connect(c1, c2, 1);
        //        result.Connect(c2, c3, 2);
        //        result.Connect(c3, c4, 1);
        //        result.Connect(c4, c5, 2);
        //        result.Connect(c5, c6, 1);
        //        result.Connect(c6, c1, 2);
        //        result.Connect(c7, c4, 1);
        //
        //        return result;
        //    }
        public static IAtomContainer CreateSimpleImine()
        {
            IAtomContainer result = builder.NewAtomContainer();

            IAtom c1 = builder.NewAtom("C");
            IAtom c2 = builder.NewAtom("N");

            result.Atoms.Add(c1);
            result.Atoms.Add(c2);

            IBond bond = builder.NewBond(c1, c2, BondOrder.Double);

            result.Bonds.Add(bond);

            AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(result);
            var adder = CDK.HydrogenAdder;

            adder.AddImplicitHydrogens(result);
            Aromaticity.CDKLegacy.Apply(result);

            SmilesGenerator sg        = new SmilesGenerator();
            string          oldSmiles = sg.Create(result);

            Console.Out.WriteLine("SimpleImine " + oldSmiles);

            return(result);
        }
        public void InliningReactionsWithRadicals()
        {
            IChemObjectBuilder bldr     = ChemObjectBuilder.Instance;
            SmilesParser       smipar   = new SmilesParser(bldr);
            IReaction          reaction = smipar.ParseReactionSmiles("[CH2]CO.CC(=O)O>[H+]>CCOC(=O)C.O |^1:0| ethyl esterification");
            SmilesGenerator    smigen   = new SmilesGenerator(SmiFlavors.CxSmiles);
            // convert to molecule
            IAtomContainer mol = ReactionManipulator.ToMolecule(reaction);

            Assert.AreEqual("[CH2]CO.CC(=O)O.[H+].CCOC(=O)C.O |^1:0|", smigen.Create(mol));
            Assert.AreEqual("[CH2]CO.CC(=O)O>[H+]>CCOC(=O)C.O |^1:0|", smigen.CreateReactionSMILES(ReactionManipulator.ToReaction(mol)));
        }
Beispiel #7
0
        public void TestIndoleAgainstItself()
        {
            IAtomContainer indole = TestMoleculeFactory.MakeIndole();

            AddImplicitHydrogens(indole);
            AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(indole);
            Aromaticity.CDKLegacy.Apply(indole);
            SmilesGenerator generator    = new SmilesGenerator().Aromatic();
            string          indoleSmiles = generator.Create(indole);
            var             smilesParser = CDK.SmilesParser;

            indole = smilesParser.ParseSmiles(indoleSmiles);

            SMARTSQueryTool querytool = new SMARTSQueryTool(indoleSmiles, ChemObjectBuilder.Instance);

            Assert.IsTrue(querytool.Matches(indole));
        }
Beispiel #8
0
        public override IReadOnlyDictionary <string, int> GetRawFingerprint(IAtomContainer atomContainer)
        {
            aromaticity.Apply(atomContainer);
            var smiles = ReplaceDigits(gen.Create(atomContainer));
            var map    = new Dictionary <string, int>();

            for (int i = 0, l = smiles.Length - n + 1; i < l; i++)
            {
                string subsmi = smiles.Substring(i, n);
                if (!map.TryGetValue(subsmi, out int count))
                {
                    map[subsmi] = 1;
                }
                else
                {
                    map[subsmi] = count + 1;
                }
            }
            return(map);
        }
Beispiel #9
0
        public static List <IReaction> SplitReaction(IReaction rxn, IDictionary <IAtom, IAtom> map)
        {
            var rctPrdPairs = from rct in rxn.Reactants
                              from prd in rxn.Products
                              select new { rct, prd };
            var mappedPairs = rctPrdPairs.Where(pair => {
                if (map.Count == 0)
                {
                    return(true);
                }
                else if (pair.rct.Atoms.Any(rctAtom => {
                    if (map.ContainsKey(rctAtom) && pair.prd.Atoms.Contains(map[rctAtom]))
                    {
                        return(true);
                    }
                    else
                    {
                        return(false);
                    }
                }))
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            });

            return(mappedPairs.Select(mp => {
                var newRxn = ChemObjectBuilder.Instance.NewReaction();
                newRxn.Reactants.Add(AtomContainerManipulator.CopyAndSuppressedHydrogens(mp.rct));
                newRxn.Products.Add(AtomContainerManipulator.CopyAndSuppressedHydrogens(mp.prd));
                newRxn.SetProperty(CDKPropertyName.SMILES, sg.Create(newRxn));
                return newRxn;
            }).ToList());
        }
Beispiel #10
0
        public static void ExtractUniqueRingSystemsFromFile(string dataFile)
        {
            Console.Out.WriteLine("****** EXTRACT UNIQUE RING SYSTEMS ******");
            Console.Out.WriteLine($"From file: {dataFile}");

            Dictionary <string, string> hashRingSystems = new Dictionary <string, string>();
            SmilesGenerator             smilesGenerator = new SmilesGenerator();

            int            counterRings       = 0;
            int            counterMolecules   = 0;
            int            counterUniqueRings = 0;
            IRingSet       ringSet            = null;
            string         key = "";
            IAtomContainer ac  = null;

            string molfile = dataFile + "_UniqueRings";

            try
            {
                using (var fout = new FileStream(molfile, FileMode.Create))
                    using (var mdlw = new MDLV2000Writer(fout))
                    {
                        try
                        {
                            Console.Out.WriteLine("Start...");
                            using (var fin = new StreamReader(dataFile))
                                using (var imdl = new EnumerableSDFReader(fin, builder))
                                {
                                    Console.Out.WriteLine("Read File in..");

                                    foreach (var m in imdl)
                                    {
                                        counterMolecules = counterMolecules + 1;

                                        IRingSet ringSetM = Cycles.FindSSSR(m).ToRingSet();

                                        if (counterMolecules % 1000 == 0)
                                        {
                                            Console.Out.WriteLine("Molecules:" + counterMolecules);
                                        }

                                        if (ringSetM.Count > 0)
                                        {
                                            var ringSystems = RingPartitioner.PartitionRings(ringSetM);

                                            for (int i = 0; i < ringSystems.Count; i++)
                                            {
                                                ringSet = (IRingSet)ringSystems[i];
                                                ac      = builder.NewAtomContainer();
                                                var containers = RingSetManipulator.GetAllAtomContainers(ringSet);
                                                foreach (var container in containers)
                                                {
                                                    ac.Add(container);
                                                }
                                                counterRings = counterRings + 1;
                                                // Only connection is important
                                                for (int j = 0; j < ac.Atoms.Count; j++)
                                                {
                                                    (ac.Atoms[j]).Symbol = "C";
                                                }

                                                try
                                                {
                                                    key = smilesGenerator.Create(builder.NewAtomContainer(ac));
                                                }
                                                catch (CDKException e)
                                                {
                                                    Trace.TraceError(e.Message);
                                                    return;
                                                }

                                                if (hashRingSystems.ContainsKey(key))
                                                {
                                                }
                                                else
                                                {
                                                    counterUniqueRings = counterUniqueRings + 1; hashRingSystems[key] = "1";
                                                    try
                                                    {
                                                        mdlw.Write(builder.NewAtomContainer(ac));
                                                    }
                                                    catch (Exception emdl)
                                                    {
                                                        if (!(emdl is ArgumentException || emdl is CDKException))
                                                        {
                                                            throw;
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                        }
                        catch (Exception exc)
                        {
                            Console.Out.WriteLine($"Could not read Molecules from file {dataFile} due to: {exc.Message}");
                        }
                    }
            }
            catch (Exception ex2)
            {
                Console.Out.WriteLine($"IOError:cannot write file due to: {ex2.ToString()}");
            }
            Console.Out.WriteLine($"READY Molecules:{counterMolecules} RingSystems:{counterRings} UniqueRingsSystem:{counterUniqueRings}");
            Console.Out.WriteLine($"HashtableKeys:{hashRingSystems.Count}");
        }
Beispiel #11
0
 static string SmiGen(IAtomContainer mol)
 {
     return(sg.Create(mol));
 }
Beispiel #12
0
        /// <summary>
        /// Internal - create a canonical SMILES string temporarily adjusting to default
        /// hydrogen count. This method may be moved to the SMILESGenerator in future.
        /// </summary>
        /// <param name="mol">molecule</param>
        /// <param name="ordering">ordering output</param>
        /// <returns>SMILES</returns>
        /// <exception cref="CDKException">SMILES could be generate</exception>
        private string CreateCanonicalSmiles(IAtomContainer mol, int[] ordering)
        {
            // backup parts we will strip off
            var hcntBackup = new int?[mol.Atoms.Count];

            var idxs = new Dictionary <IAtom, int>();

            for (int i = 0; i < mol.Atoms.Count; i++)
            {
                hcntBackup[i]      = mol.Atoms[i].ImplicitHydrogenCount;
                idxs[mol.Atoms[i]] = i;
            }

            var bondedValence = new int[mol.Atoms.Count];

            for (int i = 0; i < mol.Bonds.Count; i++)
            {
                var bond = mol.Bonds[i];
                bondedValence[idxs[bond.Begin]] += bond.Order.Numeric();
                bondedValence[idxs[bond.End]]   += bond.Order.Numeric();
            }

            // http://www.opensmiles.org/opensmiles.html#orgsbst
            for (int i = 0; i < mol.Atoms.Count; i++)
            {
                var atom = mol.Atoms[i];
                atom.ImplicitHydrogenCount = 0;
                switch (atom.AtomicNumber)
                {
                case 5:     // B
                    if (bondedValence[i] <= 3)
                    {
                        atom.ImplicitHydrogenCount = 3 - bondedValence[i];
                    }
                    break;

                case 6:     // C
                    if (bondedValence[i] <= 4)
                    {
                        atom.ImplicitHydrogenCount = 4 - bondedValence[i];
                    }
                    break;

                case 7:      // N
                case 15:     // P
                    if (bondedValence[i] <= 3)
                    {
                        atom.ImplicitHydrogenCount = 3 - bondedValence[i];
                    }
                    else if (bondedValence[i] <= 5)
                    {
                        atom.ImplicitHydrogenCount = 5 - bondedValence[i];
                    }
                    break;

                case 8:      // O
                    if (bondedValence[i] <= 2)
                    {
                        atom.ImplicitHydrogenCount = 2 - bondedValence[i];
                    }
                    break;

                case 16:     // S
                    if (bondedValence[i] <= 2)
                    {
                        atom.ImplicitHydrogenCount = 2 - bondedValence[i];
                    }
                    else if (bondedValence[i] <= 4)
                    {
                        atom.ImplicitHydrogenCount = 4 - bondedValence[i];
                    }
                    else if (bondedValence[i] <= 6)
                    {
                        atom.ImplicitHydrogenCount = 6 - bondedValence[i];
                    }
                    break;

                case 9:      // F
                case 17:     // Cl
                case 35:     // Br
                case 53:     // I
                    if (bondedValence[i] <= 1)
                    {
                        atom.ImplicitHydrogenCount = 1 - bondedValence[i];
                    }
                    break;

                default:
                    atom.ImplicitHydrogenCount = 0;
                    break;
                }
            }

            string smi = null;

            try
            {
                smi = smigen.Create(mol, ordering);
            }
            finally
            {
                // restore
                for (int i = 0; i < mol.Atoms.Count; i++)
                {
                    mol.Atoms[i].ImplicitHydrogenCount = hcntBackup[i];
                }
            }

            return(smi);
        }
Beispiel #13
0
        private List <IAtomContainer> Run(IAtomContainer atomContainer)
        {
            var fragments = new List <IAtomContainer>();

            if (atomContainer.Bonds.Count < 3)
            {
                return(fragments);
            }
            var splitableBonds = GetSplitableBonds(atomContainer);

            if (splitableBonds.Count == 0)
            {
                return(fragments);
            }
            Debug.WriteLine("Got " + splitableBonds.Count + " splittable bonds");

            string tmpSmiles;

            foreach (var bond in splitableBonds)
            {
                var parts = FragmentUtils.SplitMolecule(atomContainer, bond);
                // make sure we don't add the same fragment twice
                foreach (var partContainer in parts)
                {
                    AtomContainerManipulator.ClearAtomConfigurations(partContainer);
                    foreach (var atom in partContainer.Atoms)
                    {
                        atom.ImplicitHydrogenCount = null;
                    }
                    AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(partContainer);
                    CDK.HydrogenAdder.AddImplicitHydrogens(partContainer);
                    Aromaticity.CDKLegacy.Apply(partContainer);
                    tmpSmiles = smilesGenerator.Create(partContainer);
                    if (partContainer.Atoms.Count >= MinimumFragmentSize && !fragMap.ContainsKey(tmpSmiles))
                    {
                        fragments.Add(partContainer);
                        fragMap[tmpSmiles] = partContainer;
                    }
                }
            }

            // try and partition the fragments
            var tmp = new List <IAtomContainer>(fragments);

            foreach (var fragment in fragments)
            {
                if (fragment.Bonds.Count < 3 || fragment.Atoms.Count < MinimumFragmentSize)
                {
                    continue;
                }
                if (GetSplitableBonds(fragment).Count == 0)
                {
                    continue;
                }

                var frags = Run(fragment);
                if (frags.Count == 0)
                {
                    continue;
                }

                foreach (var frag in frags)
                {
                    if (frag.Bonds.Count < 3)
                    {
                        continue;
                    }
                    AtomContainerManipulator.ClearAtomConfigurations(frag);
                    foreach (var atom in frag.Atoms)
                    {
                        atom.ImplicitHydrogenCount = null;
                    }
                    AtomContainerManipulator.PercieveAtomTypesAndConfigureAtoms(frag);
                    CDK.HydrogenAdder.AddImplicitHydrogens(frag);
                    Aromaticity.CDKLegacy.Apply(frag);
                    tmpSmiles = smilesGenerator.Create(frag);
                    if (frag.Atoms.Count >= MinimumFragmentSize && !fragMap.ContainsKey(tmpSmiles))
                    {
                        tmp.Add(frag);
                        fragMap[tmpSmiles] = frag;
                    }
                }
            }
            fragments = new List <IAtomContainer>(tmp);
            return(fragments);
        }
Beispiel #14
0
        public static string MolToSmiles(IAtomContainer mol)
        {
            var gen = new SmilesGenerator();

            return(gen.Create(mol));
        }
Beispiel #15
0
        /// <summary>
        /// Find all enabled abbreviations in the provided molecule. They are not
        /// added to the existing Sgroups and may need filtering.
        /// </summary>
        /// <param name="mol">molecule</param>
        /// <returns>list of new abbreviation Sgroups</returns>
        public IList <Sgroup> Generate(IAtomContainer mol)
        {
            // mark which atoms have already been abbreviated or are
            // part of an existing Sgroup
            var usedAtoms = new HashSet <IAtom>();
            var sgroups   = mol.GetCtabSgroups();

            if (sgroups != null)
            {
                foreach (var sgroup in sgroups)
                {
                    foreach (var atom in sgroup.Atoms)
                    {
                        usedAtoms.Add(atom);
                    }
                }
            }

            var newSgroups = new List <Sgroup>();

            // disconnected abbreviations, salts, common reagents, large compounds
            if (!usedAtoms.Any())
            {
                try
                {
                    var    copy   = AtomContainerManipulator.CopyAndSuppressedHydrogens(mol);
                    string cansmi = usmigen.Create(copy);
                    if (disconnectedAbbreviations.TryGetValue(cansmi, out string label) && !disabled.Contains(label) && ContractToSingleLabel)
                    {
                        var sgroup = new Sgroup
                        {
                            Type      = SgroupType.CtabAbbreviation,
                            Subscript = label
                        };
                        foreach (var atom in mol.Atoms)
                        {
                            sgroup.Atoms.Add(atom);
                        }
                        return(new[] { sgroup });
                    }
                    else if (cansmi.Contains("."))
                    {
                        var parts = ConnectivityChecker.PartitionIntoMolecules(mol);

                        // leave one out
                        Sgroup best = null;
                        for (int i = 0; i < parts.Count; i++)
                        {
                            var a = parts[i];
                            var b = a.Builder.NewAtomContainer();
                            for (int j = 0; j < parts.Count; j++)
                            {
                                if (j != i)
                                {
                                    b.Add(parts[j]);
                                }
                            }
                            var sgroup1 = GetAbbr(a);
                            var sgroup2 = GetAbbr(b);
                            if (sgroup1 != null && sgroup2 != null && ContractToSingleLabel)
                            {
                                var combined = new Sgroup();
                                label = null;
                                foreach (var atom in sgroup1.Atoms)
                                {
                                    combined.Atoms.Add(atom);
                                }
                                foreach (var atom in sgroup2.Atoms)
                                {
                                    combined.Atoms.Add(atom);
                                }
                                if (sgroup1.Subscript.Length > sgroup2.Subscript.Length)
                                {
                                    combined.Subscript = sgroup1.Subscript + String_Interpunct + sgroup2.Subscript;
                                }
                                else
                                {
                                    combined.Subscript = sgroup2.Subscript + String_Interpunct + sgroup1.Subscript;
                                }
                                combined.Type = SgroupType.CtabAbbreviation;
                                return(new[] { combined });
                            }
                            if (sgroup1 != null && (best == null || sgroup1.Atoms.Count > best.Atoms.Count))
                            {
                                best = sgroup1;
                            }
                            if (sgroup2 != null && (best == null || sgroup2.Atoms.Count < best.Atoms.Count))
                            {
                                best = sgroup2;
                            }
                        }
                        if (best != null)
                        {
                            newSgroups.Add(best);
                            foreach (var atom in best.Atoms)
                            {
                                usedAtoms.Add(atom);
                            }
                        }
                    }
                }
                catch (CDKException)
                {
                }
            }

            var fragments  = GenerateFragments(mol);
            var sgroupAdjs = new MultiDictionary <IAtom, Sgroup>();

            foreach (var frag in fragments)
            {
                try
                {
                    var smi = usmigen.Create(AtomContainerManipulator.CopyAndSuppressedHydrogens(frag));
                    if (!connectedAbbreviations.TryGetValue(smi, out string label) || disabled.Contains(label))
                    {
                        continue;
                    }

                    bool overlap = false;

                    // note: first atom is '*'
                    var numAtoms = frag.Atoms.Count;
                    var numBonds = frag.Bonds.Count;
                    for (int i = 1; i < numAtoms; i++)
                    {
                        if (usedAtoms.Contains(frag.Atoms[i]))
                        {
                            overlap = true;
                            break;
                        }
                    }

                    // overlaps with previous assignment
                    if (overlap)
                    {
                        continue;
                    }

                    // create new abbreviation Sgroup
                    var sgroup = new Sgroup
                    {
                        Type      = SgroupType.CtabAbbreviation,
                        Subscript = label
                    };

                    var   attachBond = frag.Bonds[0].GetProperty <IBond>(PropertyName_CutBond);
                    IAtom attachAtom = null;
                    sgroup.Bonds.Add(attachBond);
                    for (int i = 1; i < numAtoms; i++)
                    {
                        var atom = frag.Atoms[i];
                        usedAtoms.Add(atom);
                        sgroup.Atoms.Add(atom);
                        if (attachBond.Begin.Equals(atom))
                        {
                            attachAtom = attachBond.End;
                        }
                        else if (attachBond.End.Equals(atom))
                        {
                            attachAtom = attachBond.Begin;
                        }
                    }

                    if (attachAtom != null)
                    {
                        sgroupAdjs.Add(attachAtom, sgroup);
                    }
                    newSgroups.Add(sgroup);
                }
                catch (CDKException)
                {
                    // ignore
                }
            }

            if (!ContractOnHetero)
            {
                return(newSgroups);
            }

            // now collapse
            foreach (var attach in mol.Atoms)
            {
                if (usedAtoms.Contains(attach))
                {
                    continue;
                }

                // skip charged or isotopic labelled, C or R/*, H, He
                if ((attach.FormalCharge != null && attach.FormalCharge != 0) ||
                    attach.MassNumber != null ||
                    attach.AtomicNumber == 6 ||
                    attach.AtomicNumber < 2)
                {
                    continue;
                }

                var hcount   = attach.ImplicitHydrogenCount.Value;
                var xatoms   = new HashSet <IAtom>();
                var xbonds   = new HashSet <IBond>();
                var newbonds = new HashSet <IBond>();
                xatoms.Add(attach);

                var nbrSymbols = new List <string>();
                var todelete   = new HashSet <Sgroup>();
                foreach (var sgroup in sgroupAdjs[attach])
                {
                    if (ContainsChargeChar(sgroup.Subscript))
                    {
                        continue;
                    }
                    if (sgroup.Bonds.Count != 1)
                    {
                        continue;
                    }
                    var xbond = sgroup.Bonds.First();
                    xbonds.Add(xbond);
                    foreach (var a in sgroup.Atoms)
                    {
                        xatoms.Add(a);
                    }
                    if (attach.Symbol.Length == 1 &&
                        char.IsLower(sgroup.Subscript[0]))
                    {
                        if (ChemicalElement.OfSymbol(attach.Symbol + sgroup.Subscript[0]) != ChemicalElement.R)
                        {
                            goto continue_collapse;
                        }
                    }
                    nbrSymbols.Add(sgroup.Subscript);
                    todelete.Add(sgroup);
                }
                int numSGrpNbrs = nbrSymbols.Count;
                foreach (var bond in mol.GetConnectedBonds(attach))
                {
                    if (!xbonds.Contains(bond))
                    {
                        var nbr = bond.GetOther(attach);
                        // contract terminal bonds
                        if (mol.GetConnectedBonds(nbr).Count() == 1)
                        {
                            if (nbr.MassNumber != null ||
                                (nbr.FormalCharge != null && nbr.FormalCharge != 0))
                            {
                                newbonds.Add(bond);
                            }
                            else if (nbr.AtomicNumber == 1)
                            {
                                hcount++;
                                xatoms.Add(nbr);
                            }
                            else if (nbr.AtomicNumber > 0)
                            {
                                nbrSymbols.Add(NewSymbol(nbr.AtomicNumber, nbr.ImplicitHydrogenCount.Value, false));
                                xatoms.Add(nbr);
                            }
                        }
                        else
                        {
                            newbonds.Add(bond);
                        }
                    }
                }

                // reject if no symbols
                // reject if no bonds (<1), except if all symbols are identical... (HashSet.size==1)
                // reject if more that 2 bonds
                if (!nbrSymbols.Any() ||
                    newbonds.Count < 1 && (new HashSet <string>(nbrSymbols).Count != 1) ||
                    newbonds.Count > 2)
                {
                    continue;
                }

                // create the symbol
                var sb = new StringBuilder();
                sb.Append(NewSymbol(attach.AtomicNumber, hcount, newbonds.Count == 0));
                string prev  = null;
                int    count = 0;
                nbrSymbols.Sort((o1, o2) =>
                {
                    int cmp = o1.Length.CompareTo(o2.Length);
                    if (cmp != 0)
                    {
                        return(cmp);
                    }
                    return(o1.CompareTo(o2));
                });
                foreach (string nbrSymbol in nbrSymbols)
                {
                    if (nbrSymbol.Equals(prev))
                    {
                        count++;
                    }
                    else
                    {
                        bool useParen = count == 0 || CountUpper(prev) > 1 || (prev != null && nbrSymbol.StartsWith(prev));
                        AppendGroup(sb, prev, count, useParen);
                        prev  = nbrSymbol;
                        count = 1;
                    }
                }
                AppendGroup(sb, prev, count, false);

                // remove existing
                foreach (var e in todelete)
                {
                    newSgroups.Remove(e);
                }

                // create new
                var newSgroup = new Sgroup
                {
                    Type      = SgroupType.CtabAbbreviation,
                    Subscript = sb.ToString()
                };
                foreach (var bond in newbonds)
                {
                    newSgroup.Bonds.Add(bond);
                }
                foreach (var atom in xatoms)
                {
                    newSgroup.Atoms.Add(atom);
                }

                newSgroups.Add(newSgroup);
                foreach (var a in xatoms)
                {
                    usedAtoms.Add(a);
                }
continue_collapse:
                ;
            }

            return(newSgroups);
        }