/// <summary> /// Parse the RGFile. Uses of <see cref="MDLV2000Reader"/> /// to parse individual $CTAB blocks. /// </summary> /// <param name="rGroupQuery">empty</param> /// <returns>populated query</returns> private RGroupQuery ParseRGFile(RGroupQuery rGroupQuery) { var defaultChemObjectBuilder = rGroupQuery.Builder; int lineCount = 0; string line = ""; /* Variable to capture the LOG Line(s) */ var logicDefinitions = new Dictionary <int, RGroupLogic>(); // Variable to captures attachment order for Rgroups. Contains: - pseudo // atom (Rgroup) - map with (integer,bond) meaning "bond" has attachment // order "integer" (1,2,3) for the Rgroup The order is based on the atom // block, unless there is an AAL line for the pseudo atom. var attachmentPoints = new Dictionary <IAtom, IReadOnlyDictionary <int, IBond> >(); try { // Process the Header block_________________________________________ //__________________________________________________________________ Trace.TraceInformation("Process the Header block"); CheckLineBeginsWith(input.ReadLine(), "$MDL", ++lineCount); CheckLineBeginsWith(input.ReadLine(), "$MOL", ++lineCount); CheckLineBeginsWith(input.ReadLine(), "$HDR", ++lineCount); for (int i = 1; i <= 3; i++) { lineCount++; if (input.ReadLine() == null) { throw new CDKException("RGFile invalid, empty/null header line at #" + lineCount); } //optional: parse header info here (not implemented) } CheckLineBeginsWith(input.ReadLine(), "$END HDR", ++lineCount); string rootStr; { //Process the root structure (scaffold)_____________________________ //__________________________________________________________________ Trace.TraceInformation("Process the root structure (scaffold)"); CheckLineBeginsWith(input.ReadLine(), "$CTAB", ++lineCount); //Force header var sb = new StringBuilder(RGroup.RootLabelKey + "\n\n\n"); line = input.ReadLine(); ++lineCount; while (line != null && !string.Equals(line, "$END CTAB", StringComparison.Ordinal)) { sb.Append(line).Append('\n'); //LOG lines: Logic, Unsatisfied Sites, Range of Occurrence. if (line.StartsWith("M LOG", StringComparison.Ordinal)) { var tokens = Strings.Tokenize(line); RGroupLogic log = null; log = new RGroupLogic(); var rgroupNumber = int.Parse(tokens[3], NumberFormatInfo.InvariantInfo); var tok = tokens[4]; log.rgoupNumberRequired = string.Equals(tok, "0", StringComparison.Ordinal) ? 0 : int.Parse(tok, NumberFormatInfo.InvariantInfo); log.restH = string.Equals(tokens[5], "1", StringComparison.Ordinal) ? true : false; tok = ""; for (int i = 6; i < tokens.Count; i++) { tok += tokens[i]; } log.occurence = tok; logicDefinitions[rgroupNumber] = log; } line = input.ReadLine(); ++lineCount; } rootStr = sb.ToString(); } //Let MDL reader process $CTAB block of the root structure. var reader = new MDLV2000Reader(new StringReader(rootStr), ChemObjectReaderMode.Strict); var root = reader.Read(defaultChemObjectBuilder.NewAtomContainer()); rGroupQuery.RootStructure = root; //Atom attachment order: parse AAL lines first using (var rootLinesReader = new StringReader(rootStr)) { while ((line = rootLinesReader.ReadLine()) != null) { if (line.StartsWith("M AAL", StringComparison.Ordinal)) { var stAAL = Strings.Tokenize(line); var pos = int.Parse(stAAL[2], NumberFormatInfo.InvariantInfo); var rGroup = root.Atoms[pos - 1]; var bondMap = new Dictionary <int, IBond>(); for (int i = 4; i < stAAL.Count; i += 2) { pos = int.Parse(stAAL[i], NumberFormatInfo.InvariantInfo); var partner = root.Atoms[pos - 1]; var bond = root.GetBond(rGroup, partner); var order = int.Parse(stAAL[i + 1], NumberFormatInfo.InvariantInfo); bondMap[order] = bond; Trace.TraceInformation($"AAL {order} {((IPseudoAtom)rGroup).Label}-{partner.Symbol}"); } if (bondMap.Count != 0) { attachmentPoints[rGroup] = bondMap; } } } } //Deal with remaining attachment points (non AAL) foreach (var atom in root.Atoms) { if (atom is IPseudoAtom rGroup) { if (rGroup.Label.StartsWithChar('R') && !rGroup.Label.Equals("R") && // only numbered ones !attachmentPoints.ContainsKey(rGroup)) { //Order reflects the order of atoms in the Atom Block int order = 0; var bondMap = new Dictionary <int, IBond>(); foreach (var atom2 in root.Atoms) { if (!atom.Equals(atom2)) { foreach (var bond in root.Bonds) { if (bond.Contains(atom) && bond.Contains(atom2)) { bondMap[++order] = bond; Trace.TraceInformation($"Def {order} {rGroup.Label}-{atom2.Symbol}"); break; } } } } if (bondMap.Count != 0) { attachmentPoints[rGroup] = bondMap; } } } } //Done with attachment points rGroupQuery.RootAttachmentPoints = attachmentPoints; Trace.TraceInformation($"Attachm.points defined for {attachmentPoints.Count} R# atoms"); //Process each Rgroup's $CTAB block(s)_____________________________ //__________________________________________________________________ //Set up the RgroupLists, one for each unique R# (# = 1..32 max) var rGroupDefinitions = new Dictionary <int, RGroupList>(); foreach (var atom in root.Atoms) { if (atom is IPseudoAtom rGroup) { if (RGroupQuery.IsValidRgroupQueryLabel(rGroup.Label)) { var rgroupNum = int.Parse(rGroup.Label.Substring(1), NumberFormatInfo.InvariantInfo); var rgroupList = new RGroupList(rgroupNum); if (!rGroupDefinitions.ContainsKey(rgroupNum)) { Trace.TraceInformation($"Define Rgroup R{rgroupNum}"); RGroupLogic logic = logicDefinitions[rgroupNum]; if (logic != null) { rgroupList.IsRestH = logic.restH; rgroupList.Occurrence = logic.occurence; rgroupList.RequiredRGroupNumber = logic.rgoupNumberRequired; } else { rgroupList.IsRestH = false; rgroupList.Occurrence = ">0"; rgroupList.RequiredRGroupNumber = 0; } rgroupList.RGroups = new List <RGroup>(); rGroupDefinitions[rgroupNum] = rgroupList; } } } } //Parse all $CTAB blocks per Rgroup (there can be more than one) line = input.ReadLine(); ++lineCount; bool hasMoreRGP = true; while (hasMoreRGP) { CheckLineBeginsWith(line, "$RGP", lineCount); line = input.ReadLine(); ++lineCount; Trace.TraceInformation("line for num is " + line); int rgroupNum = int.Parse(line.Trim(), NumberFormatInfo.InvariantInfo); line = input.ReadLine(); ++lineCount; bool hasMoreCTAB = true; while (hasMoreCTAB) { CheckLineBeginsWith(line, "$CTAB", lineCount); var sb = new StringBuilder(RGroup.MakeLabel(rgroupNum) + "\n\n\n"); line = input.ReadLine(); while (line != null && !line.StartsWith("$END CTAB", StringComparison.Ordinal)) { sb.Append(line).Append('\n'); line = input.ReadLine(); ++lineCount; } var groupStr = sb.ToString(); reader = new MDLV2000Reader(new StringReader(groupStr), ChemObjectReaderMode.Strict); var group = reader.Read(defaultChemObjectBuilder.NewAtomContainer()); var rGroup = new RGroup { Group = group }; //Parse the Rgroup's attachment points (APO) using (var groupLinesReader = new StringReader(groupStr)) { while ((line = groupLinesReader.ReadLine()) != null) { if (line.StartsWith("M APO", StringComparison.Ordinal)) { var stAPO = Strings.Tokenize(line); for (int i = 3; i < stAPO.Count; i += 2) { var pos = int.Parse(stAPO[i], NumberFormatInfo.InvariantInfo); var apo = int.Parse(stAPO[i + 1], NumberFormatInfo.InvariantInfo); var at = group.Atoms[pos - 1]; switch (apo) { case 1: rGroup.FirstAttachmentPoint = at; break; case 2: rGroup.SecondAttachmentPoint = at; break; case 3: { rGroup.FirstAttachmentPoint = at; rGroup.SecondAttachmentPoint = at; } break; } } } } } var rList = rGroupDefinitions[rgroupNum]; if (rList == null) { throw new CDKException("R" + rgroupNum + " not defined but referenced in $RGP."); } else { rList.RGroups.Add(rGroup); } line = input.ReadLine(); ++lineCount; if (line.StartsWith("$END RGP", StringComparison.Ordinal)) { Trace.TraceInformation("end of RGP block"); hasMoreCTAB = false; } } line = input.ReadLine(); ++lineCount; if (line.StartsWith("$END MOL", StringComparison.Ordinal)) { hasMoreRGP = false; } } rGroupQuery.RGroupDefinitions = rGroupDefinitions; Trace.TraceInformation("Number of lines was " + lineCount); return(rGroupQuery); } catch (CDKException exception) { string error = $"CDK Error while parsing line {lineCount}: {line} -> {exception.Message}"; Trace.TraceError(error); Debug.WriteLine(exception); throw; } catch (Exception exception) { if (!(exception is IOException || exception is ArgumentException)) { throw; } Console.Error.WriteLine(exception.StackTrace); var error = exception.GetType() + "Error while parsing line " + lineCount + ": " + line + " -> " + exception.Message; Trace.TraceError(error); Debug.WriteLine(exception); throw new CDKException(error, exception); } }
public void TestRgroupQueryFile2() { var filename = "NCDK.Data.MDL.rgfile.2.mol"; Trace.TraceInformation("Testing: " + filename); var ins = ResourceLoader.GetAsStream(filename); var reader = new RGroupQueryReader(ins); var rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery(ChemObjectBuilder.Instance)); reader.Close(); Assert.IsNotNull(rGroupQuery); Assert.AreEqual(rGroupQuery.RGroupDefinitions.Count, 3); Assert.AreEqual(rGroupQuery.RootStructure.Atoms.Count, 14); Assert.AreEqual(rGroupQuery.RootAttachmentPoints.Count, 4); var rGroupQueryAtoms = rGroupQuery.GetAllRgroupQueryAtoms(); Assert.AreEqual(rGroupQueryAtoms.Count, 4); rGroupQueryAtoms = rGroupQuery.GetRgroupQueryAtoms(1); Assert.AreEqual(rGroupQueryAtoms.Count, 1); foreach (var at in rGroupQuery.GetAllRgroupQueryAtoms()) { if (at is PseudoAtom) { Assert.IsTrue(RGroupQuery.IsValidRgroupQueryLabel(((PseudoAtom)at).Label)); var rgroupNum = int.Parse(((PseudoAtom)at).Label.Substring(1)); Assert.IsTrue(rgroupNum == 1 || rgroupNum == 2 || rgroupNum == 11); switch (rgroupNum) { case 1: { //Test: R1 has two attachment points, defined by AAL var rootApo = rGroupQuery.RootAttachmentPoints; var apoBonds = rootApo[at]; Assert.AreEqual(apoBonds.Count, 2); Assert.AreEqual(apoBonds[1].GetOther(at).Symbol, "N"); Assert.IsTrue(apoBonds[2].GetOther(at).Symbol.Equals("C")); //Test: Oxygens are the 2nd APO's for R1 var rList = rGroupQuery.RGroupDefinitions[1]; Assert.AreEqual(rList.RGroups.Count, 2); var rGroups = rList.RGroups; Assert.AreEqual(rGroups[0].SecondAttachmentPoint.Symbol, "O"); Assert.AreEqual(rGroups[1].SecondAttachmentPoint.Symbol, "O"); Assert.IsFalse(rList.IsRestH); } break; case 2: { RGroupList rList = rGroupQuery.RGroupDefinitions[2]; Assert.AreEqual(rList.RGroups.Count, 2); Assert.AreEqual(rList.Occurrence, "0,2"); Assert.AreEqual(rList.RequiredRGroupNumber, 11); Assert.IsFalse(rList.IsRestH); } break; case 11: { RGroupList rList = rGroupQuery.RGroupDefinitions[11]; Assert.AreEqual(rList.RGroups.Count, 1); Assert.AreEqual(rList.RequiredRGroupNumber, 0); Assert.IsTrue(rList.IsRestH); var rGroups = rList.RGroups; Assert.AreEqual(rGroups[0].FirstAttachmentPoint.Symbol, "Pt"); Assert.AreEqual(rGroups[0].SecondAttachmentPoint, null); } break; } } } var configurations = rGroupQuery.GetAllConfigurations(); Assert.AreEqual(12, configurations.Count()); //Test restH values int countRestHForSmallestConfigurations = 0; foreach (var atc in configurations) { if (atc.Atoms.Count == 13) { // smallest configuration foreach (var atom in atc.Atoms) { if (atom.GetProperty <bool?>(CDKPropertyName.RestH) != null) { countRestHForSmallestConfigurations++; if (atom.Symbol.Equals("P")) { Assert.AreEqual(atom.GetProperty <bool>(CDKPropertyName.RestH), true); } } } } } Assert.AreEqual(countRestHForSmallestConfigurations, 6); }
public void TestRgroupQueryFile1() { var filename = "NCDK.Data.MDL.rgfile.1.mol"; Trace.TraceInformation("Testing: " + filename); var ins = ResourceLoader.GetAsStream(filename); RGroupQueryReader reader = new RGroupQueryReader(ins); RGroupQuery rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery(ChemObjectBuilder.Instance)); reader.Close(); Assert.IsNotNull(rGroupQuery); Assert.AreEqual(rGroupQuery.RGroupDefinitions.Count, 1); Assert.AreEqual(rGroupQuery.RootStructure.Atoms.Count, 7); foreach (var at in rGroupQuery.GetAllRgroupQueryAtoms()) { if (at is PseudoAtom) { Assert.AreEqual(((PseudoAtom)at).Label, "R1"); var rootApo = rGroupQuery.RootAttachmentPoints; var apoBonds = rootApo[at]; Assert.AreEqual(apoBonds.Count, 1); // Assert that the root attachment is the bond between R1 and P foreach (var bond in rGroupQuery.RootStructure.Bonds) { if (bond.Contains(at)) { Assert.AreEqual(bond, apoBonds[1]); foreach (var atInApo in bond.Atoms) { Assert.IsTrue(atInApo.Symbol.Equals("R") || atInApo.Symbol.Equals("P")); } } } } } int val_1 = rGroupQuery.RGroupDefinitions.Keys.First(); Assert.AreEqual(val_1, 1); RGroupList rList = rGroupQuery.RGroupDefinitions[val_1]; Assert.AreEqual(rList.Occurrence, "0,1-3"); var rGroups = rList.RGroups; Assert.AreEqual(rGroups[0].FirstAttachmentPoint.Symbol, "N"); Assert.AreEqual(rGroups[1].FirstAttachmentPoint.Symbol, "O"); Assert.AreEqual(rGroups[2].FirstAttachmentPoint.Symbol, "S"); Assert.IsNull(rGroups[0].SecondAttachmentPoint); Assert.IsNull(rGroups[1].SecondAttachmentPoint); Assert.IsNull(rGroups[2].SecondAttachmentPoint); var configurations = rGroupQuery.GetAllConfigurations(); Assert.AreEqual(configurations.Count(), 4); //IsRestH is set to true for R1, so with zero substitutes, the phosphor should get the restH flag set to true. bool restH_Identified = false; foreach (var atc in configurations) { if (atc.Atoms.Count == 6) { foreach (var atom in atc.Atoms) { if (atom.Symbol.Equals("P")) { Assert.IsNotNull(atom.GetProperty <bool?>(CDKPropertyName.RestH)); Assert.AreEqual(atom.GetProperty <bool>(CDKPropertyName.RestH), true); restH_Identified = true; } } } } Assert.IsTrue(restH_Identified); }
/// <summary> /// The actual writing of the output. /// </summary> /// <param name="obj"></param> /// <exception cref="CDKException">could not write RGroup query</exception> public override void Write(IChemObject obj) { if (!(obj is IRGroupQuery)) { throw new CDKException("Only IRGroupQuery input is accepted."); } try { IRGroupQuery rGroupQuery = (IRGroupQuery)obj; string now = DateTime.UtcNow.ToString("MMddyyHHmm", DateTimeFormatInfo.InvariantInfo); IAtomContainer rootAtc = rGroupQuery.RootStructure; //Construct header var rootBlock = new StringBuilder(); string header = "$MDL REV 1 " + now + LSEP + "$MOL" + LSEP + "$HDR" + LSEP + " Rgroup query file (RGFile)" + LSEP + " CDK " + now + "2D" + LSEP + LSEP + "$END HDR" + LSEP + "$CTAB"; rootBlock.Append(header).Append(LSEP); //Construct the root structure, the scaffold string rootCTAB = GetCTAB(rootAtc); rootCTAB = rootCTAB.Replace(LSEP + "M END" + LSEP, ""); rootBlock.Append(rootCTAB).Append(LSEP); //Write the root's LOG lines foreach (var rgrpNum in rGroupQuery.RGroupDefinitions.Keys) { RGroupList rgList = rGroupQuery.RGroupDefinitions[rgrpNum]; int restH = rgList.IsRestH ? 1 : 0; string logLine = "M LOG" + MDLV2000Writer.FormatMDLInt(1, 3) + MDLV2000Writer.FormatMDLInt(rgrpNum, 4) + MDLV2000Writer.FormatMDLInt(rgList.RequiredRGroupNumber, 4) + MDLV2000Writer.FormatMDLInt(restH, 4) + " " + rgList.Occurrence; rootBlock.Append(logLine).Append(LSEP); } //AAL lines are optional, they are needed for R-atoms with multiple bonds to the root //for which the order of the attachment points can not be implicitly derived //from the order in the atom block. See CT spec for more on that. foreach (var rgroupAtom in rGroupQuery.RootAttachmentPoints.Keys) { var rApo = rGroupQuery.RootAttachmentPoints[rgroupAtom]; if (rApo.Count > 1) { int prevPos = -1; int apoIdx = 1; bool implicitlyOrdered = true; while (rApo.ContainsKey(apoIdx) && implicitlyOrdered) { IAtom partner = rApo[apoIdx].GetOther(rgroupAtom); for (int atIdx = 0; atIdx < rootAtc.Atoms.Count; atIdx++) { if (rootAtc.Atoms[atIdx].Equals(partner)) { if (atIdx < prevPos) { implicitlyOrdered = false; } prevPos = atIdx; break; } } apoIdx++; } if (!implicitlyOrdered) { StringBuilder aalLine = new StringBuilder("M AAL"); for (int atIdx = 0; atIdx < rootAtc.Atoms.Count; atIdx++) { if (rootAtc.Atoms[atIdx].Equals(rgroupAtom)) { aalLine.Append(MDLV2000Writer.FormatMDLInt((atIdx + 1), 4)); aalLine.Append(MDLV2000Writer.FormatMDLInt(rApo.Count, 3)); apoIdx = 1; while (rApo.ContainsKey(apoIdx)) { IAtom partner = rApo[apoIdx].GetOther(rgroupAtom); for (int a = 0; a < rootAtc.Atoms.Count; a++) { if (rootAtc.Atoms[a].Equals(partner)) { aalLine.Append(MDLV2000Writer.FormatMDLInt(a + 1, 4)); aalLine.Append(MDLV2000Writer.FormatMDLInt(apoIdx, 4)); } } apoIdx++; } } } rootBlock.Append(aalLine.ToString()).Append(LSEP); } } } rootBlock.Append("M END").Append(LSEP).Append("$END CTAB").Append(LSEP); //Construct each R-group block var rgpBlock = new StringBuilder(); foreach (var rgrpNum in rGroupQuery.RGroupDefinitions.Keys) { var rgrpList = rGroupQuery.RGroupDefinitions[rgrpNum].RGroups; if (rgrpList != null && rgrpList.Count != 0) { rgpBlock.Append("$RGP").Append(LSEP);; rgpBlock.Append(MDLV2000Writer.FormatMDLInt(rgrpNum, 4)).Append(LSEP); foreach (var rgroup in rgrpList) { //CTAB block rgpBlock.Append("$CTAB").Append(LSEP); string ctab = GetCTAB(rgroup.Group); ctab = ctab.Replace(LSEP + "M END" + LSEP, ""); rgpBlock.Append(ctab).Append(LSEP); //The APO line IAtom firstAttachmentPoint = rgroup.FirstAttachmentPoint; IAtom secondAttachmentPoint = rgroup.SecondAttachmentPoint; int apoCount = 0; if (firstAttachmentPoint != null) { var apoLine = new StringBuilder(); for (int atIdx = 0; atIdx < rgroup.Group.Atoms.Count; atIdx++) { if (rgroup.Group.Atoms[atIdx].Equals(firstAttachmentPoint)) { apoLine.Append(MDLV2000Writer.FormatMDLInt((atIdx + 1), 4)); apoCount++; if (secondAttachmentPoint != null && secondAttachmentPoint.Equals(firstAttachmentPoint)) { apoLine.Append(MDLV2000Writer.FormatMDLInt(3, 4)); } else { apoLine.Append(MDLV2000Writer.FormatMDLInt(1, 4)); } } } if (secondAttachmentPoint != null && !secondAttachmentPoint.Equals(firstAttachmentPoint)) { for (int atIdx = 0; atIdx < rgroup.Group.Atoms.Count; atIdx++) { if (rgroup.Group.Atoms[atIdx].Equals(secondAttachmentPoint)) { apoCount++; apoLine.Append(MDLV2000Writer.FormatMDLInt((atIdx + 1), 4)); apoLine.Append(MDLV2000Writer.FormatMDLInt(2, 4)); } } } if (apoCount > 0) { apoLine.Insert(0, "M APO" + MDLV2000Writer.FormatMDLInt(apoCount, 3)); rgpBlock.Append(apoLine).Append(LSEP); } } rgpBlock.Append("M END").Append(LSEP); rgpBlock.Append("$END CTAB").Append(LSEP); } rgpBlock.Append("$END RGP").Append(LSEP); } } rgpBlock.Append("$END MOL").Append(LSEP); writer.Write(rootBlock.ToString()); writer.Write(rgpBlock.ToString()); writer.Flush(); } catch (IOException e) { Console.Error.WriteLine(e.StackTrace); throw new CDKException("Unexpected exception when writing RGFile" + LSEP + e.Message); } }