コード例 #1
0
        private string Recreate(string file)
        {
            StringWriter      sw  = new StringWriter();
            RGroupQueryWriter rgw = new RGroupQueryWriter(sw);
            var ins = ResourceLoader.GetAsStream(file);
            RGroupQueryReader reader      = new RGroupQueryReader(ins);
            RGroupQuery       rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery());

            rgw.Write(rGroupQuery);
            string output = sw.ToString();

            return(output);
        }
コード例 #2
0
        public void TestRgroupQueryFile3()
        {
            var filename = "NCDK.Data.MDL.rgfile.3.mol";

            Trace.TraceInformation("Testing: " + filename);
            var ins = ResourceLoader.GetAsStream(filename);
            RGroupQueryReader reader      = new RGroupQueryReader(ins);
            RGroupQuery       rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery(ChemObjectBuilder.Instance));

            reader.Close();
            Assert.IsNotNull(rGroupQuery);
            Assert.AreEqual(rGroupQuery.RGroupDefinitions.Count, 1);
            Assert.AreEqual(rGroupQuery.RootStructure.Atoms.Count, 10);
            Assert.AreEqual(rGroupQuery.RootAttachmentPoints.Count, 2);

            Assert.AreEqual(rGroupQuery.GetAllConfigurations().Count(), 8);

            //Test correctness AAL lines
            foreach (var at in rGroupQuery.GetRgroupQueryAtoms(1))
            {
                if (at is PseudoAtom)
                {
                    Assert.AreEqual(((PseudoAtom)at).Label, "R1");

                    var apoBonds = rGroupQuery.RootAttachmentPoints[at];
                    Assert.AreEqual(apoBonds.Count, 2);

                    var boundAtom1 = apoBonds[1].GetOther(at);
                    Assert.IsTrue(boundAtom1.Symbol.Equals("Te") || boundAtom1.Symbol.Equals("S"));

                    var boundAtom2 = apoBonds[2].GetOther(at);
                    Assert.IsTrue(boundAtom2.Symbol.Equals("Po") || boundAtom2.Symbol.Equals("O"));
                }
            }

            // Test that there only two Rgroup query atoms (R#). The third R is a
            // pseudo atom, but because it is not numbered it is not part of any
            // query condition.
            var allrGroupQueryAtoms = rGroupQuery.GetAllRgroupQueryAtoms();

            Assert.AreEqual(allrGroupQueryAtoms.Count, 2);
        }
コード例 #3
0
ファイル: RGroupQueryReader.cs プロジェクト: roddickchen/NCDK
        /// <summary>
        /// Parse the RGFile. Uses of <see cref="MDLV2000Reader"/>
        /// to parse individual $CTAB blocks.
        /// </summary>
        /// <param name="rGroupQuery">empty</param>
        /// <returns>populated query</returns>
        private RGroupQuery ParseRGFile(RGroupQuery rGroupQuery)
        {
            var    defaultChemObjectBuilder = rGroupQuery.Builder;
            int    lineCount = 0;
            string line      = "";
            /* Variable to capture the LOG Line(s) */
            var logicDefinitions = new Dictionary <int, RGroupLogic>();

            // Variable to captures attachment order for Rgroups. Contains: - pseudo
            // atom (Rgroup) - map with (integer,bond) meaning "bond" has attachment
            // order "integer" (1,2,3) for the Rgroup The order is based on the atom
            // block, unless there is an AAL line for the pseudo atom.
            var attachmentPoints = new Dictionary <IAtom, IReadOnlyDictionary <int, IBond> >();

            try
            {
                // Process the Header block_________________________________________
                //__________________________________________________________________
                Trace.TraceInformation("Process the Header block");
                CheckLineBeginsWith(input.ReadLine(), "$MDL", ++lineCount);
                CheckLineBeginsWith(input.ReadLine(), "$MOL", ++lineCount);
                CheckLineBeginsWith(input.ReadLine(), "$HDR", ++lineCount);

                for (int i = 1; i <= 3; i++)
                {
                    lineCount++;
                    if (input.ReadLine() == null)
                    {
                        throw new CDKException("RGFile invalid, empty/null header line at #" + lineCount);
                    }
                    //optional: parse header info here (not implemented)
                }
                CheckLineBeginsWith(input.ReadLine(), "$END HDR", ++lineCount);

                string rootStr;
                {
                    //Process the root structure (scaffold)_____________________________
                    //__________________________________________________________________
                    Trace.TraceInformation("Process the root structure (scaffold)");
                    CheckLineBeginsWith(input.ReadLine(), "$CTAB", ++lineCount);
                    //Force header
                    var sb = new StringBuilder(RGroup.RootLabelKey + "\n\n\n");
                    line = input.ReadLine();
                    ++lineCount;
                    while (line != null && !string.Equals(line, "$END CTAB", StringComparison.Ordinal))
                    {
                        sb.Append(line).Append('\n');

                        //LOG lines: Logic, Unsatisfied Sites, Range of Occurrence.
                        if (line.StartsWith("M  LOG", StringComparison.Ordinal))
                        {
                            var         tokens = Strings.Tokenize(line);
                            RGroupLogic log    = null;

                            log = new RGroupLogic();
                            var rgroupNumber = int.Parse(tokens[3], NumberFormatInfo.InvariantInfo);
                            var tok          = tokens[4];
                            log.rgoupNumberRequired = string.Equals(tok, "0", StringComparison.Ordinal) ? 0 : int.Parse(tok, NumberFormatInfo.InvariantInfo);
                            log.restH = string.Equals(tokens[5], "1", StringComparison.Ordinal) ? true : false;
                            tok       = "";
                            for (int i = 6; i < tokens.Count; i++)
                            {
                                tok += tokens[i];
                            }
                            log.occurence = tok;
                            logicDefinitions[rgroupNumber] = log;
                        }

                        line = input.ReadLine();
                        ++lineCount;
                    }
                    rootStr = sb.ToString();
                }

                //Let MDL reader process $CTAB block of the root structure.
                var reader = new MDLV2000Reader(new StringReader(rootStr), ChemObjectReaderMode.Strict);
                var root   = reader.Read(defaultChemObjectBuilder.NewAtomContainer());
                rGroupQuery.RootStructure = root;

                //Atom attachment order: parse AAL lines first
                using (var rootLinesReader = new StringReader(rootStr))
                {
                    while ((line = rootLinesReader.ReadLine()) != null)
                    {
                        if (line.StartsWith("M  AAL", StringComparison.Ordinal))
                        {
                            var stAAL   = Strings.Tokenize(line);
                            var pos     = int.Parse(stAAL[2], NumberFormatInfo.InvariantInfo);
                            var rGroup  = root.Atoms[pos - 1];
                            var bondMap = new Dictionary <int, IBond>();
                            for (int i = 4; i < stAAL.Count; i += 2)
                            {
                                pos = int.Parse(stAAL[i], NumberFormatInfo.InvariantInfo);
                                var partner = root.Atoms[pos - 1];
                                var bond    = root.GetBond(rGroup, partner);
                                var order   = int.Parse(stAAL[i + 1], NumberFormatInfo.InvariantInfo);
                                bondMap[order] = bond;
                                Trace.TraceInformation($"AAL {order} {((IPseudoAtom)rGroup).Label}-{partner.Symbol}");
                            }
                            if (bondMap.Count != 0)
                            {
                                attachmentPoints[rGroup] = bondMap;
                            }
                        }
                    }
                }
                //Deal with remaining attachment points (non AAL)
                foreach (var atom in root.Atoms)
                {
                    if (atom is IPseudoAtom rGroup)
                    {
                        if (rGroup.Label.StartsWithChar('R') &&
                            !rGroup.Label.Equals("R") && // only numbered ones
                            !attachmentPoints.ContainsKey(rGroup))
                        {
                            //Order reflects the order of atoms in the Atom Block
                            int order   = 0;
                            var bondMap = new Dictionary <int, IBond>();
                            foreach (var atom2 in root.Atoms)
                            {
                                if (!atom.Equals(atom2))
                                {
                                    foreach (var bond in root.Bonds)
                                    {
                                        if (bond.Contains(atom) && bond.Contains(atom2))
                                        {
                                            bondMap[++order] = bond;
                                            Trace.TraceInformation($"Def {order} {rGroup.Label}-{atom2.Symbol}");
                                            break;
                                        }
                                    }
                                }
                            }
                            if (bondMap.Count != 0)
                            {
                                attachmentPoints[rGroup] = bondMap;
                            }
                        }
                    }
                }
                //Done with attachment points
                rGroupQuery.RootAttachmentPoints = attachmentPoints;
                Trace.TraceInformation($"Attachm.points defined for {attachmentPoints.Count} R# atoms");

                //Process each Rgroup's $CTAB block(s)_____________________________
                //__________________________________________________________________

                //Set up the RgroupLists, one for each unique R# (# = 1..32 max)
                var rGroupDefinitions = new Dictionary <int, RGroupList>();

                foreach (var atom in root.Atoms)
                {
                    if (atom is IPseudoAtom rGroup)
                    {
                        if (RGroupQuery.IsValidRgroupQueryLabel(rGroup.Label))
                        {
                            var rgroupNum  = int.Parse(rGroup.Label.Substring(1), NumberFormatInfo.InvariantInfo);
                            var rgroupList = new RGroupList(rgroupNum);
                            if (!rGroupDefinitions.ContainsKey(rgroupNum))
                            {
                                Trace.TraceInformation($"Define Rgroup R{rgroupNum}");
                                RGroupLogic logic = logicDefinitions[rgroupNum];
                                if (logic != null)
                                {
                                    rgroupList.IsRestH              = logic.restH;
                                    rgroupList.Occurrence           = logic.occurence;
                                    rgroupList.RequiredRGroupNumber = logic.rgoupNumberRequired;
                                }
                                else
                                {
                                    rgroupList.IsRestH              = false;
                                    rgroupList.Occurrence           = ">0";
                                    rgroupList.RequiredRGroupNumber = 0;
                                }
                                rgroupList.RGroups           = new List <RGroup>();
                                rGroupDefinitions[rgroupNum] = rgroupList;
                            }
                        }
                    }
                }

                //Parse all $CTAB blocks per Rgroup (there can be more than one)
                line = input.ReadLine();
                ++lineCount;
                bool hasMoreRGP = true;
                while (hasMoreRGP)
                {
                    CheckLineBeginsWith(line, "$RGP", lineCount);
                    line = input.ReadLine();
                    ++lineCount;
                    Trace.TraceInformation("line for num is " + line);
                    int rgroupNum = int.Parse(line.Trim(), NumberFormatInfo.InvariantInfo);
                    line = input.ReadLine();
                    ++lineCount;

                    bool hasMoreCTAB = true;
                    while (hasMoreCTAB)
                    {
                        CheckLineBeginsWith(line, "$CTAB", lineCount);
                        var sb = new StringBuilder(RGroup.MakeLabel(rgroupNum) + "\n\n\n");
                        line = input.ReadLine();
                        while (line != null && !line.StartsWith("$END CTAB", StringComparison.Ordinal))
                        {
                            sb.Append(line).Append('\n');
                            line = input.ReadLine();
                            ++lineCount;
                        }
                        var groupStr = sb.ToString();
                        reader = new MDLV2000Reader(new StringReader(groupStr), ChemObjectReaderMode.Strict);
                        var group  = reader.Read(defaultChemObjectBuilder.NewAtomContainer());
                        var rGroup = new RGroup
                        {
                            Group = group
                        };

                        //Parse the Rgroup's attachment points (APO)
                        using (var groupLinesReader = new StringReader(groupStr))
                        {
                            while ((line = groupLinesReader.ReadLine()) != null)
                            {
                                if (line.StartsWith("M  APO", StringComparison.Ordinal))
                                {
                                    var stAPO = Strings.Tokenize(line);
                                    for (int i = 3; i < stAPO.Count; i += 2)
                                    {
                                        var pos = int.Parse(stAPO[i], NumberFormatInfo.InvariantInfo);
                                        var apo = int.Parse(stAPO[i + 1], NumberFormatInfo.InvariantInfo);
                                        var at  = group.Atoms[pos - 1];
                                        switch (apo)
                                        {
                                        case 1:
                                            rGroup.FirstAttachmentPoint = at;
                                            break;

                                        case 2:
                                            rGroup.SecondAttachmentPoint = at;
                                            break;

                                        case 3:
                                        {
                                            rGroup.FirstAttachmentPoint  = at;
                                            rGroup.SecondAttachmentPoint = at;
                                        }
                                        break;
                                        }
                                    }
                                }
                            }
                        }
                        var rList = rGroupDefinitions[rgroupNum];
                        if (rList == null)
                        {
                            throw new CDKException("R" + rgroupNum + " not defined but referenced in $RGP.");
                        }
                        else
                        {
                            rList.RGroups.Add(rGroup);
                        }
                        line = input.ReadLine();
                        ++lineCount;
                        if (line.StartsWith("$END RGP", StringComparison.Ordinal))
                        {
                            Trace.TraceInformation("end of RGP block");
                            hasMoreCTAB = false;
                        }
                    }

                    line = input.ReadLine();
                    ++lineCount;
                    if (line.StartsWith("$END MOL", StringComparison.Ordinal))
                    {
                        hasMoreRGP = false;
                    }
                }

                rGroupQuery.RGroupDefinitions = rGroupDefinitions;
                Trace.TraceInformation("Number of lines was " + lineCount);
                return(rGroupQuery);
            }
            catch (CDKException exception)
            {
                string error = $"CDK Error while parsing line {lineCount}: {line} -> {exception.Message}";
                Trace.TraceError(error);
                Debug.WriteLine(exception);
                throw;
            }
            catch (Exception exception)
            {
                if (!(exception is IOException || exception is ArgumentException))
                {
                    throw;
                }
                Console.Error.WriteLine(exception.StackTrace);
                var error = exception.GetType() + "Error while parsing line " + lineCount + ": " + line + " -> " + exception.Message;
                Trace.TraceError(error);
                Debug.WriteLine(exception);
                throw new CDKException(error, exception);
            }
        }
コード例 #4
0
        public void TestRgroupQueryFile1()
        {
            var filename = "NCDK.Data.MDL.rgfile.1.mol";

            Trace.TraceInformation("Testing: " + filename);
            var ins = ResourceLoader.GetAsStream(filename);
            RGroupQueryReader reader      = new RGroupQueryReader(ins);
            RGroupQuery       rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery(ChemObjectBuilder.Instance));

            reader.Close();
            Assert.IsNotNull(rGroupQuery);
            Assert.AreEqual(rGroupQuery.RGroupDefinitions.Count, 1);
            Assert.AreEqual(rGroupQuery.RootStructure.Atoms.Count, 7);

            foreach (var at in rGroupQuery.GetAllRgroupQueryAtoms())
            {
                if (at is PseudoAtom)
                {
                    Assert.AreEqual(((PseudoAtom)at).Label, "R1");
                    var rootApo  = rGroupQuery.RootAttachmentPoints;
                    var apoBonds = rootApo[at];
                    Assert.AreEqual(apoBonds.Count, 1);
                    // Assert that the root attachment is the bond between R1 and P
                    foreach (var bond in rGroupQuery.RootStructure.Bonds)
                    {
                        if (bond.Contains(at))
                        {
                            Assert.AreEqual(bond, apoBonds[1]);
                            foreach (var atInApo in bond.Atoms)
                            {
                                Assert.IsTrue(atInApo.Symbol.Equals("R") || atInApo.Symbol.Equals("P"));
                            }
                        }
                    }
                }
            }

            int val_1 = rGroupQuery.RGroupDefinitions.Keys.First();

            Assert.AreEqual(val_1, 1);
            RGroupList rList = rGroupQuery.RGroupDefinitions[val_1];

            Assert.AreEqual(rList.Occurrence, "0,1-3");

            var rGroups = rList.RGroups;

            Assert.AreEqual(rGroups[0].FirstAttachmentPoint.Symbol, "N");
            Assert.AreEqual(rGroups[1].FirstAttachmentPoint.Symbol, "O");
            Assert.AreEqual(rGroups[2].FirstAttachmentPoint.Symbol, "S");

            Assert.IsNull(rGroups[0].SecondAttachmentPoint);
            Assert.IsNull(rGroups[1].SecondAttachmentPoint);
            Assert.IsNull(rGroups[2].SecondAttachmentPoint);

            var configurations = rGroupQuery.GetAllConfigurations();

            Assert.AreEqual(configurations.Count(), 4);

            //IsRestH is set to true for R1, so with zero substitutes, the phosphor should get the restH flag set to true.
            bool restH_Identified = false;

            foreach (var atc in configurations)
            {
                if (atc.Atoms.Count == 6)
                {
                    foreach (var atom in atc.Atoms)
                    {
                        if (atom.Symbol.Equals("P"))
                        {
                            Assert.IsNotNull(atom.GetProperty <bool?>(CDKPropertyName.RestH));
                            Assert.AreEqual(atom.GetProperty <bool>(CDKPropertyName.RestH), true);
                            restH_Identified = true;
                        }
                    }
                }
            }
            Assert.IsTrue(restH_Identified);
        }
コード例 #5
0
        public void TestRgroupQueryFile2()
        {
            var filename = "NCDK.Data.MDL.rgfile.2.mol";

            Trace.TraceInformation("Testing: " + filename);
            var ins         = ResourceLoader.GetAsStream(filename);
            var reader      = new RGroupQueryReader(ins);
            var rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery(ChemObjectBuilder.Instance));

            reader.Close();
            Assert.IsNotNull(rGroupQuery);
            Assert.AreEqual(rGroupQuery.RGroupDefinitions.Count, 3);
            Assert.AreEqual(rGroupQuery.RootStructure.Atoms.Count, 14);
            Assert.AreEqual(rGroupQuery.RootAttachmentPoints.Count, 4);

            var rGroupQueryAtoms = rGroupQuery.GetAllRgroupQueryAtoms();

            Assert.AreEqual(rGroupQueryAtoms.Count, 4);

            rGroupQueryAtoms = rGroupQuery.GetRgroupQueryAtoms(1);
            Assert.AreEqual(rGroupQueryAtoms.Count, 1);

            foreach (var at in rGroupQuery.GetAllRgroupQueryAtoms())
            {
                if (at is PseudoAtom)
                {
                    Assert.IsTrue(RGroupQuery.IsValidRgroupQueryLabel(((PseudoAtom)at).Label));

                    var rgroupNum = int.Parse(((PseudoAtom)at).Label.Substring(1));
                    Assert.IsTrue(rgroupNum == 1 || rgroupNum == 2 || rgroupNum == 11);
                    switch (rgroupNum)
                    {
                    case 1:
                    {
                        //Test: R1 has two attachment points, defined by AAL
                        var rootApo  = rGroupQuery.RootAttachmentPoints;
                        var apoBonds = rootApo[at];
                        Assert.AreEqual(apoBonds.Count, 2);
                        Assert.AreEqual(apoBonds[1].GetOther(at).Symbol, "N");
                        Assert.IsTrue(apoBonds[2].GetOther(at).Symbol.Equals("C"));
                        //Test: Oxygens are the 2nd APO's for R1
                        var rList = rGroupQuery.RGroupDefinitions[1];
                        Assert.AreEqual(rList.RGroups.Count, 2);
                        var rGroups = rList.RGroups;
                        Assert.AreEqual(rGroups[0].SecondAttachmentPoint.Symbol, "O");
                        Assert.AreEqual(rGroups[1].SecondAttachmentPoint.Symbol, "O");
                        Assert.IsFalse(rList.IsRestH);
                    }
                    break;

                    case 2:
                    {
                        RGroupList rList = rGroupQuery.RGroupDefinitions[2];
                        Assert.AreEqual(rList.RGroups.Count, 2);
                        Assert.AreEqual(rList.Occurrence, "0,2");
                        Assert.AreEqual(rList.RequiredRGroupNumber, 11);
                        Assert.IsFalse(rList.IsRestH);
                    }
                    break;

                    case 11:
                    {
                        RGroupList rList = rGroupQuery.RGroupDefinitions[11];
                        Assert.AreEqual(rList.RGroups.Count, 1);
                        Assert.AreEqual(rList.RequiredRGroupNumber, 0);
                        Assert.IsTrue(rList.IsRestH);

                        var rGroups = rList.RGroups;
                        Assert.AreEqual(rGroups[0].FirstAttachmentPoint.Symbol, "Pt");
                        Assert.AreEqual(rGroups[0].SecondAttachmentPoint, null);
                    }
                    break;
                    }
                }
            }

            var configurations = rGroupQuery.GetAllConfigurations();

            Assert.AreEqual(12, configurations.Count());

            //Test restH values
            int countRestHForSmallestConfigurations = 0;

            foreach (var atc in configurations)
            {
                if (atc.Atoms.Count == 13)
                { // smallest configuration
                    foreach (var atom in atc.Atoms)
                    {
                        if (atom.GetProperty <bool?>(CDKPropertyName.RestH) != null)
                        {
                            countRestHForSmallestConfigurations++;
                            if (atom.Symbol.Equals("P"))
                            {
                                Assert.AreEqual(atom.GetProperty <bool>(CDKPropertyName.RestH), true);
                            }
                        }
                    }
                }
            }
            Assert.AreEqual(countRestHForSmallestConfigurations, 6);
        }