/// <summary> /// Read input data from database /// </summary> /// <param name="smp"> /// <returns></returns> List<CompoundStructureActivityData> ReadData( SasMapParms smp) { MetaColumn activityMc = smp.EndpointMc; QueryColumn keyCriteriaQc = smp.KeyCriteriaQc; AssertMx.IsNotNull(activityMc, "mc"); AssertMx.IsNotNull(keyCriteriaQc, "keyCriteriaQc"); MetaTable mt, mt2; MetaColumn mc2 = null; Query q = new Query(); mt = activityMc.MetaTable; QueryTable qt = new QueryTable(mt); if (mt.SummarizedExists && !mt.UseSummarizedData) { // retrieve summarized data if exists mt2 = MetaTableCollection.Get(mt.Name + MetaTable.SummarySuffix); if (mt2 != null) { mc2 = mt2.GetMetaColumnByName(activityMc.Name); if (mc2 == null) mc2 = mt2.GetMetaColumnByLabel(activityMc.Label); } if (mc2 != null) // same column available in summarized? { mt = mt2; activityMc = mc2; } } SMP.KeyCriteriaQc.CopyCriteriaToQueryKeyCritera(q); q.KeyCriteriaDisplay = SMP.KeyCriteriaQc.CriteriaDisplay; qt.SelectKeyOnly(); QueryColumn qc = qt.GetQueryColumnByName(activityMc.Name); qc.Selected = true; q.AddQueryTable(qt); QueryEngine qe = new QueryEngine(); List<string> keyList = qe.ExecuteQuery(q); // note that keylist may be empty if single-step query HashSet<string> keySet = new HashSet<string>(StringComparer.OrdinalIgnoreCase); List<CompoundStructureActivityData> data = new List<CompoundStructureActivityData>(); int rowCount = 0; while (true) { object[] vo = qe.NextRow(); if (vo == null) break; CompoundStructureActivityData cd = new CompoundStructureActivityData(); string cid = (string)vo[0]; cd.Cid = cid; keySet.Add(cid); // accumulate keys object val = vo[2]; if (NullValue.IsNull(val)) continue; if (val is double) cd.Activity = (double)val; else if (val is Int32) cd.Activity = (Int32)val; else if (val is NumberMx) { NumberMx nex = val as NumberMx; cd.Activity = nex.Value; } else if (val is QualifiedNumber) { QualifiedNumber qn = val as QualifiedNumber; cd.Activity = qn.NumberValue; //if (qn.Qualifier != null && qn.Qualifier != "" && qn.Qualifier != "=") // continue; // (don't want to do this since may filter out good data (e.g. IC50 <0.0001)) } else continue; if (cd.Activity == NullValue.NullNumber) continue; data.Add(cd); rowCount++; } // Retrieve structures keyList = new List<string>(keySet); Dictionary<string, MoleculeMx> csDict = MoleculeUtil.SelectMoleculesForCidList(keyList, qt.MetaTable); // get the structures in a single step // Add structures and build/store fingerprints to data DebugLog.Message("========== Fingerprints ============"); foreach (CompoundStructureActivityData cd in data) { if (!csDict.ContainsKey(cd.Cid) || csDict[cd.Cid] == null) continue; if (cd.Cid == "111" || cd.Cid == "222") csDict = csDict; // debug MoleculeMx cs = csDict[cd.Cid]; cd.Structure = cs; FingerprintType fpType = FingerprintType.Circular; int fpSubtype = -1; if (SMP.SimilarityType == SimilaritySearchType.ECFP4) // some issue with ECFP4? { fpType = FingerprintType.Circular; fpSubtype = CircularFingerprintType.ECFP4; } else if (SMP.SimilarityType == SimilaritySearchType.Normal) { fpType = FingerprintType.MACCS; } cd.BitsetFingerprint = cs.BuildBitSetFingerprint(fpType, fpSubtype); if (cd.BitsetFingerprint == null) continue; // couldn't build fingerprint (e.g. no structure) if (Debug) DebugLog.Message(cd.Cid + ": " + Lex.Join(CdkMolUtil.GetBitSet(cd.BitsetFingerprint), ", ")); } return data; }
/// <summary> /// NextRow - Return the next matching row value object /// </summary> /// <returns></returns> public override Object[] NextRow() { throw new NotImplementedException(); #if false string cid = ""; Molecule substituent = null; Stopwatch swTotal = Stopwatch.StartNew(); Stopwatch sw = Stopwatch.StartNew(); if (Eqp.SearchKeySubset == null || Eqp.SearchKeySubset.Count == 0) { return(null); } // Get the structures for the set of keys if (CidToStructureDict == null) { CidToStructureDict = MoleculeUtil.SelectMoleculesForCidList(Eqp.SearchKeySubset); KeyListPos = -1; MapPos = -1; int msTime = (int)sw.ElapsedMilliseconds; if (RGroupDecomp.Debug) { DebugLog.Message("Select " + Eqp.SearchKeySubset.Count + " structures time(ms): " + msTime); } } // Get the next match while (true) { if (KeyListPos < 0 || MapPos + 1 >= MapCount) { // go to next structure & set up mapping KeyListPos++; if (KeyListPos >= Eqp.SearchKeySubset.Count) { return(null); } cid = Eqp.SearchKeySubset[KeyListPos]; if (!CidToStructureDict.ContainsKey(cid)) { continue; } MoleculeMx cs = CidToStructureDict[cid]; string molFile = cs.GetMolfileString(); //string chime = cs.ChimeString; // debug if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t1", sw); } if (DebugMx.False) // debug { //molFile = FileUtil.ReadFile(@"C:\Downloads\RGroupTarget.mol"); string targetSmiles = "CC1Cn2cc(c(= O)c3c2c(c(c(c3)F)Cl)S1)C(= O)O"; string targetChime = "CYAAFQwADfwQ19aXPcZERR45lQkn08$hZNXzeJ2yaAhDnxxJou4Gq9od8VG1ykiO63fQpvM8W4C6MR$O3VaZjQwrGr5weW3y^BeUEezndoIivvAbQN58EEHVMAsdPaF4LIsqsf$OCBUPHI5njBB2LIBy3i2cwbrD8T8kFVBVWkTCfIUFUtblI0G7vYiEL^svUWCT^m6tF18I7ISJUp^7WkuzpT9LrBSJLmMl5hHXog$68Q6YPb0^Xp0ftxmy7FDSF^sWib6^JrUMhrHtLfJ3yVMTm9RIrvvKMRQvKqq4G1Ooze5pdlpSdzp7MFl0K1zx4tdnNFoUO1kRPpyZks61qbBz2tU0L$svvojoU4yUlf$^MFF0nqSfGVW2PKv9TReb$knLXffdAAhqBN310WUfdJDoQBNX1a5L2uj9ybNNRLYpaZN1p6WYp2WI^ntQEVTBaJF1Uu28N4o2xudURpVITTKO7omUtpgLoaoOQHazmZG3k^aHPUOQfE0d27eAbE^uxcQUAB"; molFile = CdkMol.Util.MoleculeTofMolfileString(CdkMol.Util.ChimeStringToMolecule(targetChime)); } Molecule target = CdkMol.Util.MolfileStringToMolecule(molFile); if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t2", sw); } MapCount = RGroupDecomp.ProcessTargetMolecule(target); // process & get number of maps if (MapCount > 1) { MapCount = MapCount; // debug, seems to always be 1 } MapPos = -1; if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t3", sw); } } // Get the substituent for each RGroup for the next mapping MapPos++; // get data for next mapping for (int si = 0; si < Substituents.Length; si++) // clear substituents { Substituents[si] = null; } for (int fi = 0; fi < RgTotalCount; fi++) { Molecule fragment = RGroupDecomp.GetIthMappingFragment(MapPos, fi); if (fragment == null) { break; // must have reached the end } if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t4", sw); } int ri = CdkMol.Util.GetFragmentRGroupAssignment(fragment); if (ri >= 0 && ri < Substituents.Length) { Substituents[ri] = fragment; } } if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t5", sw); } // Fill in the Vo object[] vo = new object[Qt.SelectedCount]; for (int ci = 0; ci < SelectList.Count; ci++) { MetaColumn mc = SelectList[ci]; string name = mc.Name.ToLower(); if (mc.IsKey) { vo[ci] = Eqp.SearchKeySubset[KeyListPos]; } else if (Lex.Eq(name, "Core")) { vo[ci] = CoreChemicalStructure; } else if (Lex.Eq(name, "Terminate_Option")) { vo[ci] = TerminateOptionString; } else if (Lex.Eq(name, "Map_Number")) { vo[ci] = (MapPos + 1).ToString(); } else if (Lex.Eq(name, "Map")) { if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t6", sw); } MoleculeMx cs = null; try // hilight core molecule { if (StrMatcher == null) // initialize matcher with core structure if not done yet { StrMatcher = new StructureMatcher(); string molfile2 = CdkMol.Util.RemoveRGroupAttachmentPointAtoms(CoreChemicalStructure.GetMolfileString()); if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t7", sw); } MoleculeMx cs2 = new MoleculeMx(MoleculeFormat.Molfile, molfile2); StrMatcher.SetSSSQueryMolecule(cs2); // set core query used for highlighting if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t8", sw); } } Molecule alignedTarget = RGroupDecomp.GetAlignedTargetForMapping(MapPos); if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t9", sw); } string chime = CdkMol.Util.MoleculeToChimeString(alignedTarget); if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t10", sw); } cs = new MoleculeMx(MoleculeFormat.Chime, chime); cs = StrMatcher.HighlightMatchingSubstructure(cs); } catch (Exception ex) { ex = ex; } vo[ci] = cs; // store value if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t11", sw); } } else if (Lex.Eq(name, "Is_Map_Complete")) { vo[ci] = "Yes"; // always complete for PP //else vo[ci] = "No"; } else { // must be a substituent if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t12", sw); } int ri = name.IndexOf("_"); if (ri < 0 || !int.TryParse(name.Substring(1, ri - 1), out ri)) { continue; // in case of bogus name } substituent = Substituents[ri]; if (substituent == null) { vo[ci] = null; // no mapping found continue; } if (name.EndsWith("_structure")) { string chime = CdkMol.Util.MoleculeToChimeString(substituent); MoleculeMx cs = new MoleculeMx(MoleculeFormat.Chime, chime); vo[ci] = cs; } else if (name.EndsWith("_smiles")) { string smiles = CdkMol.Util.MoleculeToSmilesString(substituent); vo[ci] = smiles; } else if (name.EndsWith("_formula")) { string mf = CdkMol.Util.GetMolFormulaDotDisconnect(substituent); vo[ci] = mf; } else if (name.EndsWith("_weight")) { vo[ci] = CdkMol.Util.GetMolWeight(substituent); } else if (name.EndsWith("_substno")) { vo[ci] = ri; } if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("t13", sw); } } } int msTime = (int)sw.ElapsedMilliseconds; if (RGroupDecomp.Debug) { DebugLog.StopwatchMessage("Total Time for Cid: " + cid, swTotal); } return(vo); } #endif }