private string Recreate(string file) { StringWriter sw = new StringWriter(); RGroupQueryWriter rgw = new RGroupQueryWriter(sw); var ins = ResourceLoader.GetAsStream(file); RGroupQueryReader reader = new RGroupQueryReader(ins); RGroupQuery rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery()); rgw.Write(rGroupQuery); string output = sw.ToString(); return(output); }
public void TestRgroupQueryFile3() { var filename = "NCDK.Data.MDL.rgfile.3.mol"; Trace.TraceInformation("Testing: " + filename); var ins = ResourceLoader.GetAsStream(filename); RGroupQueryReader reader = new RGroupQueryReader(ins); RGroupQuery rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery(ChemObjectBuilder.Instance)); reader.Close(); Assert.IsNotNull(rGroupQuery); Assert.AreEqual(rGroupQuery.RGroupDefinitions.Count, 1); Assert.AreEqual(rGroupQuery.RootStructure.Atoms.Count, 10); Assert.AreEqual(rGroupQuery.RootAttachmentPoints.Count, 2); Assert.AreEqual(rGroupQuery.GetAllConfigurations().Count(), 8); //Test correctness AAL lines foreach (var at in rGroupQuery.GetRgroupQueryAtoms(1)) { if (at is PseudoAtom) { Assert.AreEqual(((PseudoAtom)at).Label, "R1"); var apoBonds = rGroupQuery.RootAttachmentPoints[at]; Assert.AreEqual(apoBonds.Count, 2); var boundAtom1 = apoBonds[1].GetOther(at); Assert.IsTrue(boundAtom1.Symbol.Equals("Te") || boundAtom1.Symbol.Equals("S")); var boundAtom2 = apoBonds[2].GetOther(at); Assert.IsTrue(boundAtom2.Symbol.Equals("Po") || boundAtom2.Symbol.Equals("O")); } } // Test that there only two Rgroup query atoms (R#). The third R is a // pseudo atom, but because it is not numbered it is not part of any // query condition. var allrGroupQueryAtoms = rGroupQuery.GetAllRgroupQueryAtoms(); Assert.AreEqual(allrGroupQueryAtoms.Count, 2); }
/// <summary> /// Parse the RGFile. Uses of <see cref="MDLV2000Reader"/> /// to parse individual $CTAB blocks. /// </summary> /// <param name="rGroupQuery">empty</param> /// <returns>populated query</returns> private RGroupQuery ParseRGFile(RGroupQuery rGroupQuery) { var defaultChemObjectBuilder = rGroupQuery.Builder; int lineCount = 0; string line = ""; /* Variable to capture the LOG Line(s) */ var logicDefinitions = new Dictionary <int, RGroupLogic>(); // Variable to captures attachment order for Rgroups. Contains: - pseudo // atom (Rgroup) - map with (integer,bond) meaning "bond" has attachment // order "integer" (1,2,3) for the Rgroup The order is based on the atom // block, unless there is an AAL line for the pseudo atom. var attachmentPoints = new Dictionary <IAtom, IReadOnlyDictionary <int, IBond> >(); try { // Process the Header block_________________________________________ //__________________________________________________________________ Trace.TraceInformation("Process the Header block"); CheckLineBeginsWith(input.ReadLine(), "$MDL", ++lineCount); CheckLineBeginsWith(input.ReadLine(), "$MOL", ++lineCount); CheckLineBeginsWith(input.ReadLine(), "$HDR", ++lineCount); for (int i = 1; i <= 3; i++) { lineCount++; if (input.ReadLine() == null) { throw new CDKException("RGFile invalid, empty/null header line at #" + lineCount); } //optional: parse header info here (not implemented) } CheckLineBeginsWith(input.ReadLine(), "$END HDR", ++lineCount); string rootStr; { //Process the root structure (scaffold)_____________________________ //__________________________________________________________________ Trace.TraceInformation("Process the root structure (scaffold)"); CheckLineBeginsWith(input.ReadLine(), "$CTAB", ++lineCount); //Force header var sb = new StringBuilder(RGroup.RootLabelKey + "\n\n\n"); line = input.ReadLine(); ++lineCount; while (line != null && !string.Equals(line, "$END CTAB", StringComparison.Ordinal)) { sb.Append(line).Append('\n'); //LOG lines: Logic, Unsatisfied Sites, Range of Occurrence. if (line.StartsWith("M LOG", StringComparison.Ordinal)) { var tokens = Strings.Tokenize(line); RGroupLogic log = null; log = new RGroupLogic(); var rgroupNumber = int.Parse(tokens[3], NumberFormatInfo.InvariantInfo); var tok = tokens[4]; log.rgoupNumberRequired = string.Equals(tok, "0", StringComparison.Ordinal) ? 0 : int.Parse(tok, NumberFormatInfo.InvariantInfo); log.restH = string.Equals(tokens[5], "1", StringComparison.Ordinal) ? true : false; tok = ""; for (int i = 6; i < tokens.Count; i++) { tok += tokens[i]; } log.occurence = tok; logicDefinitions[rgroupNumber] = log; } line = input.ReadLine(); ++lineCount; } rootStr = sb.ToString(); } //Let MDL reader process $CTAB block of the root structure. var reader = new MDLV2000Reader(new StringReader(rootStr), ChemObjectReaderMode.Strict); var root = reader.Read(defaultChemObjectBuilder.NewAtomContainer()); rGroupQuery.RootStructure = root; //Atom attachment order: parse AAL lines first using (var rootLinesReader = new StringReader(rootStr)) { while ((line = rootLinesReader.ReadLine()) != null) { if (line.StartsWith("M AAL", StringComparison.Ordinal)) { var stAAL = Strings.Tokenize(line); var pos = int.Parse(stAAL[2], NumberFormatInfo.InvariantInfo); var rGroup = root.Atoms[pos - 1]; var bondMap = new Dictionary <int, IBond>(); for (int i = 4; i < stAAL.Count; i += 2) { pos = int.Parse(stAAL[i], NumberFormatInfo.InvariantInfo); var partner = root.Atoms[pos - 1]; var bond = root.GetBond(rGroup, partner); var order = int.Parse(stAAL[i + 1], NumberFormatInfo.InvariantInfo); bondMap[order] = bond; Trace.TraceInformation($"AAL {order} {((IPseudoAtom)rGroup).Label}-{partner.Symbol}"); } if (bondMap.Count != 0) { attachmentPoints[rGroup] = bondMap; } } } } //Deal with remaining attachment points (non AAL) foreach (var atom in root.Atoms) { if (atom is IPseudoAtom rGroup) { if (rGroup.Label.StartsWithChar('R') && !rGroup.Label.Equals("R") && // only numbered ones !attachmentPoints.ContainsKey(rGroup)) { //Order reflects the order of atoms in the Atom Block int order = 0; var bondMap = new Dictionary <int, IBond>(); foreach (var atom2 in root.Atoms) { if (!atom.Equals(atom2)) { foreach (var bond in root.Bonds) { if (bond.Contains(atom) && bond.Contains(atom2)) { bondMap[++order] = bond; Trace.TraceInformation($"Def {order} {rGroup.Label}-{atom2.Symbol}"); break; } } } } if (bondMap.Count != 0) { attachmentPoints[rGroup] = bondMap; } } } } //Done with attachment points rGroupQuery.RootAttachmentPoints = attachmentPoints; Trace.TraceInformation($"Attachm.points defined for {attachmentPoints.Count} R# atoms"); //Process each Rgroup's $CTAB block(s)_____________________________ //__________________________________________________________________ //Set up the RgroupLists, one for each unique R# (# = 1..32 max) var rGroupDefinitions = new Dictionary <int, RGroupList>(); foreach (var atom in root.Atoms) { if (atom is IPseudoAtom rGroup) { if (RGroupQuery.IsValidRgroupQueryLabel(rGroup.Label)) { var rgroupNum = int.Parse(rGroup.Label.Substring(1), NumberFormatInfo.InvariantInfo); var rgroupList = new RGroupList(rgroupNum); if (!rGroupDefinitions.ContainsKey(rgroupNum)) { Trace.TraceInformation($"Define Rgroup R{rgroupNum}"); RGroupLogic logic = logicDefinitions[rgroupNum]; if (logic != null) { rgroupList.IsRestH = logic.restH; rgroupList.Occurrence = logic.occurence; rgroupList.RequiredRGroupNumber = logic.rgoupNumberRequired; } else { rgroupList.IsRestH = false; rgroupList.Occurrence = ">0"; rgroupList.RequiredRGroupNumber = 0; } rgroupList.RGroups = new List <RGroup>(); rGroupDefinitions[rgroupNum] = rgroupList; } } } } //Parse all $CTAB blocks per Rgroup (there can be more than one) line = input.ReadLine(); ++lineCount; bool hasMoreRGP = true; while (hasMoreRGP) { CheckLineBeginsWith(line, "$RGP", lineCount); line = input.ReadLine(); ++lineCount; Trace.TraceInformation("line for num is " + line); int rgroupNum = int.Parse(line.Trim(), NumberFormatInfo.InvariantInfo); line = input.ReadLine(); ++lineCount; bool hasMoreCTAB = true; while (hasMoreCTAB) { CheckLineBeginsWith(line, "$CTAB", lineCount); var sb = new StringBuilder(RGroup.MakeLabel(rgroupNum) + "\n\n\n"); line = input.ReadLine(); while (line != null && !line.StartsWith("$END CTAB", StringComparison.Ordinal)) { sb.Append(line).Append('\n'); line = input.ReadLine(); ++lineCount; } var groupStr = sb.ToString(); reader = new MDLV2000Reader(new StringReader(groupStr), ChemObjectReaderMode.Strict); var group = reader.Read(defaultChemObjectBuilder.NewAtomContainer()); var rGroup = new RGroup { Group = group }; //Parse the Rgroup's attachment points (APO) using (var groupLinesReader = new StringReader(groupStr)) { while ((line = groupLinesReader.ReadLine()) != null) { if (line.StartsWith("M APO", StringComparison.Ordinal)) { var stAPO = Strings.Tokenize(line); for (int i = 3; i < stAPO.Count; i += 2) { var pos = int.Parse(stAPO[i], NumberFormatInfo.InvariantInfo); var apo = int.Parse(stAPO[i + 1], NumberFormatInfo.InvariantInfo); var at = group.Atoms[pos - 1]; switch (apo) { case 1: rGroup.FirstAttachmentPoint = at; break; case 2: rGroup.SecondAttachmentPoint = at; break; case 3: { rGroup.FirstAttachmentPoint = at; rGroup.SecondAttachmentPoint = at; } break; } } } } } var rList = rGroupDefinitions[rgroupNum]; if (rList == null) { throw new CDKException("R" + rgroupNum + " not defined but referenced in $RGP."); } else { rList.RGroups.Add(rGroup); } line = input.ReadLine(); ++lineCount; if (line.StartsWith("$END RGP", StringComparison.Ordinal)) { Trace.TraceInformation("end of RGP block"); hasMoreCTAB = false; } } line = input.ReadLine(); ++lineCount; if (line.StartsWith("$END MOL", StringComparison.Ordinal)) { hasMoreRGP = false; } } rGroupQuery.RGroupDefinitions = rGroupDefinitions; Trace.TraceInformation("Number of lines was " + lineCount); return(rGroupQuery); } catch (CDKException exception) { string error = $"CDK Error while parsing line {lineCount}: {line} -> {exception.Message}"; Trace.TraceError(error); Debug.WriteLine(exception); throw; } catch (Exception exception) { if (!(exception is IOException || exception is ArgumentException)) { throw; } Console.Error.WriteLine(exception.StackTrace); var error = exception.GetType() + "Error while parsing line " + lineCount + ": " + line + " -> " + exception.Message; Trace.TraceError(error); Debug.WriteLine(exception); throw new CDKException(error, exception); } }
public void TestRgroupQueryFile1() { var filename = "NCDK.Data.MDL.rgfile.1.mol"; Trace.TraceInformation("Testing: " + filename); var ins = ResourceLoader.GetAsStream(filename); RGroupQueryReader reader = new RGroupQueryReader(ins); RGroupQuery rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery(ChemObjectBuilder.Instance)); reader.Close(); Assert.IsNotNull(rGroupQuery); Assert.AreEqual(rGroupQuery.RGroupDefinitions.Count, 1); Assert.AreEqual(rGroupQuery.RootStructure.Atoms.Count, 7); foreach (var at in rGroupQuery.GetAllRgroupQueryAtoms()) { if (at is PseudoAtom) { Assert.AreEqual(((PseudoAtom)at).Label, "R1"); var rootApo = rGroupQuery.RootAttachmentPoints; var apoBonds = rootApo[at]; Assert.AreEqual(apoBonds.Count, 1); // Assert that the root attachment is the bond between R1 and P foreach (var bond in rGroupQuery.RootStructure.Bonds) { if (bond.Contains(at)) { Assert.AreEqual(bond, apoBonds[1]); foreach (var atInApo in bond.Atoms) { Assert.IsTrue(atInApo.Symbol.Equals("R") || atInApo.Symbol.Equals("P")); } } } } } int val_1 = rGroupQuery.RGroupDefinitions.Keys.First(); Assert.AreEqual(val_1, 1); RGroupList rList = rGroupQuery.RGroupDefinitions[val_1]; Assert.AreEqual(rList.Occurrence, "0,1-3"); var rGroups = rList.RGroups; Assert.AreEqual(rGroups[0].FirstAttachmentPoint.Symbol, "N"); Assert.AreEqual(rGroups[1].FirstAttachmentPoint.Symbol, "O"); Assert.AreEqual(rGroups[2].FirstAttachmentPoint.Symbol, "S"); Assert.IsNull(rGroups[0].SecondAttachmentPoint); Assert.IsNull(rGroups[1].SecondAttachmentPoint); Assert.IsNull(rGroups[2].SecondAttachmentPoint); var configurations = rGroupQuery.GetAllConfigurations(); Assert.AreEqual(configurations.Count(), 4); //IsRestH is set to true for R1, so with zero substitutes, the phosphor should get the restH flag set to true. bool restH_Identified = false; foreach (var atc in configurations) { if (atc.Atoms.Count == 6) { foreach (var atom in atc.Atoms) { if (atom.Symbol.Equals("P")) { Assert.IsNotNull(atom.GetProperty <bool?>(CDKPropertyName.RestH)); Assert.AreEqual(atom.GetProperty <bool>(CDKPropertyName.RestH), true); restH_Identified = true; } } } } Assert.IsTrue(restH_Identified); }
public void TestRgroupQueryFile2() { var filename = "NCDK.Data.MDL.rgfile.2.mol"; Trace.TraceInformation("Testing: " + filename); var ins = ResourceLoader.GetAsStream(filename); var reader = new RGroupQueryReader(ins); var rGroupQuery = (RGroupQuery)reader.Read(new RGroupQuery(ChemObjectBuilder.Instance)); reader.Close(); Assert.IsNotNull(rGroupQuery); Assert.AreEqual(rGroupQuery.RGroupDefinitions.Count, 3); Assert.AreEqual(rGroupQuery.RootStructure.Atoms.Count, 14); Assert.AreEqual(rGroupQuery.RootAttachmentPoints.Count, 4); var rGroupQueryAtoms = rGroupQuery.GetAllRgroupQueryAtoms(); Assert.AreEqual(rGroupQueryAtoms.Count, 4); rGroupQueryAtoms = rGroupQuery.GetRgroupQueryAtoms(1); Assert.AreEqual(rGroupQueryAtoms.Count, 1); foreach (var at in rGroupQuery.GetAllRgroupQueryAtoms()) { if (at is PseudoAtom) { Assert.IsTrue(RGroupQuery.IsValidRgroupQueryLabel(((PseudoAtom)at).Label)); var rgroupNum = int.Parse(((PseudoAtom)at).Label.Substring(1)); Assert.IsTrue(rgroupNum == 1 || rgroupNum == 2 || rgroupNum == 11); switch (rgroupNum) { case 1: { //Test: R1 has two attachment points, defined by AAL var rootApo = rGroupQuery.RootAttachmentPoints; var apoBonds = rootApo[at]; Assert.AreEqual(apoBonds.Count, 2); Assert.AreEqual(apoBonds[1].GetOther(at).Symbol, "N"); Assert.IsTrue(apoBonds[2].GetOther(at).Symbol.Equals("C")); //Test: Oxygens are the 2nd APO's for R1 var rList = rGroupQuery.RGroupDefinitions[1]; Assert.AreEqual(rList.RGroups.Count, 2); var rGroups = rList.RGroups; Assert.AreEqual(rGroups[0].SecondAttachmentPoint.Symbol, "O"); Assert.AreEqual(rGroups[1].SecondAttachmentPoint.Symbol, "O"); Assert.IsFalse(rList.IsRestH); } break; case 2: { RGroupList rList = rGroupQuery.RGroupDefinitions[2]; Assert.AreEqual(rList.RGroups.Count, 2); Assert.AreEqual(rList.Occurrence, "0,2"); Assert.AreEqual(rList.RequiredRGroupNumber, 11); Assert.IsFalse(rList.IsRestH); } break; case 11: { RGroupList rList = rGroupQuery.RGroupDefinitions[11]; Assert.AreEqual(rList.RGroups.Count, 1); Assert.AreEqual(rList.RequiredRGroupNumber, 0); Assert.IsTrue(rList.IsRestH); var rGroups = rList.RGroups; Assert.AreEqual(rGroups[0].FirstAttachmentPoint.Symbol, "Pt"); Assert.AreEqual(rGroups[0].SecondAttachmentPoint, null); } break; } } } var configurations = rGroupQuery.GetAllConfigurations(); Assert.AreEqual(12, configurations.Count()); //Test restH values int countRestHForSmallestConfigurations = 0; foreach (var atc in configurations) { if (atc.Atoms.Count == 13) { // smallest configuration foreach (var atom in atc.Atoms) { if (atom.GetProperty <bool?>(CDKPropertyName.RestH) != null) { countRestHForSmallestConfigurations++; if (atom.Symbol.Equals("P")) { Assert.AreEqual(atom.GetProperty <bool>(CDKPropertyName.RestH), true); } } } } } Assert.AreEqual(countRestHForSmallestConfigurations, 6); }