示例#1
0
		/// <summary>
		/// Read input data from database
		/// </summary>
		/// <param name="smp">
		/// <returns></returns>

		List<CompoundStructureActivityData> ReadData(
			SasMapParms smp)
		{
			MetaColumn activityMc = smp.EndpointMc;
			QueryColumn keyCriteriaQc = smp.KeyCriteriaQc;

			AssertMx.IsNotNull(activityMc, "mc");
			AssertMx.IsNotNull(keyCriteriaQc, "keyCriteriaQc");

			MetaTable mt, mt2;
			MetaColumn mc2 = null;

			Query q = new Query();
			mt = activityMc.MetaTable;
			QueryTable qt = new QueryTable(mt);
			if (mt.SummarizedExists && !mt.UseSummarizedData)
			{ // retrieve summarized data if exists 
				mt2 = MetaTableCollection.Get(mt.Name + MetaTable.SummarySuffix);
				if (mt2 != null)
				{
					mc2 = mt2.GetMetaColumnByName(activityMc.Name);
					if (mc2 == null) mc2 = mt2.GetMetaColumnByLabel(activityMc.Label);
				}

				if (mc2 != null) // same column available in summarized?
				{
					mt = mt2;
					activityMc = mc2;
				}
			}

			SMP.KeyCriteriaQc.CopyCriteriaToQueryKeyCritera(q);
			q.KeyCriteriaDisplay = SMP.KeyCriteriaQc.CriteriaDisplay;

			qt.SelectKeyOnly();
			QueryColumn qc = qt.GetQueryColumnByName(activityMc.Name);
			qc.Selected = true;
			q.AddQueryTable(qt);

			QueryEngine qe = new QueryEngine();
			List<string> keyList = qe.ExecuteQuery(q); // note that keylist may be empty if single-step query

			HashSet<string> keySet = new HashSet<string>(StringComparer.OrdinalIgnoreCase);

			List<CompoundStructureActivityData> data = new List<CompoundStructureActivityData>();

			int rowCount = 0;
			while (true)
			{
				object[] vo = qe.NextRow();
				if (vo == null) break;
				CompoundStructureActivityData cd = new CompoundStructureActivityData();
				string cid = (string)vo[0];
				cd.Cid = cid;
				keySet.Add(cid); // accumulate keys

				object val = vo[2];
				if (NullValue.IsNull(val)) continue;
				if (val is double)
					cd.Activity = (double)val;
				else if (val is Int32)
					cd.Activity = (Int32)val;

				else if (val is NumberMx)
				{
					NumberMx nex = val as NumberMx;
					cd.Activity = nex.Value;
				}

				else if (val is QualifiedNumber)
				{
					QualifiedNumber qn = val as QualifiedNumber;
					cd.Activity = qn.NumberValue;
					//if (qn.Qualifier != null && qn.Qualifier != "" && qn.Qualifier != "=")
					//	continue; // (don't want to do this since may filter out good data (e.g. IC50 <0.0001))
				}

				else continue;

				if (cd.Activity == NullValue.NullNumber) continue;

				data.Add(cd);
				rowCount++;
			}

			// Retrieve structures

			keyList = new List<string>(keySet);
			Dictionary<string, MoleculeMx> csDict = MoleculeUtil.SelectMoleculesForCidList(keyList, qt.MetaTable); // get the structures in a single step

			// Add structures and build/store fingerprints to data

			DebugLog.Message("========== Fingerprints ============");

			foreach (CompoundStructureActivityData cd in data)
			{

				if (!csDict.ContainsKey(cd.Cid) || csDict[cd.Cid] == null) continue;

				if (cd.Cid == "111" || cd.Cid == "222") csDict = csDict; // debug

				MoleculeMx cs = csDict[cd.Cid];
				cd.Structure = cs;

				FingerprintType fpType = FingerprintType.Circular;
				int fpSubtype = -1;

				if (SMP.SimilarityType == SimilaritySearchType.ECFP4) // some issue with ECFP4?
				{
					fpType = FingerprintType.Circular;
					fpSubtype = CircularFingerprintType.ECFP4;
				}

				else if (SMP.SimilarityType == SimilaritySearchType.Normal)
				{
					fpType = FingerprintType.MACCS;
				}

				cd.BitsetFingerprint = cs.BuildBitSetFingerprint(fpType, fpSubtype);
				if (cd.BitsetFingerprint == null) continue; // couldn't build fingerprint (e.g. no structure)

				if (Debug) DebugLog.Message(cd.Cid + ": " + Lex.Join(CdkMolUtil.GetBitSet(cd.BitsetFingerprint), ", "));
			}

			return data;
		}
示例#2
0
        /// <summary>
        /// NextRow - Return the next matching row value object
        /// </summary>
        /// <returns></returns>

        public override Object[] NextRow()
        {
            throw new NotImplementedException();
#if false
            string   cid         = "";
            Molecule substituent = null;

            Stopwatch swTotal = Stopwatch.StartNew();
            Stopwatch sw      = Stopwatch.StartNew();

            if (Eqp.SearchKeySubset == null || Eqp.SearchKeySubset.Count == 0)
            {
                return(null);
            }

            // Get the structures for the set of keys

            if (CidToStructureDict == null)
            {
                CidToStructureDict = MoleculeUtil.SelectMoleculesForCidList(Eqp.SearchKeySubset);
                KeyListPos         = -1;
                MapPos             = -1;
                int msTime = (int)sw.ElapsedMilliseconds;
                if (RGroupDecomp.Debug)
                {
                    DebugLog.Message("Select " + Eqp.SearchKeySubset.Count + " structures time(ms): " + msTime);
                }
            }

            // Get the next match

            while (true)
            {
                if (KeyListPos < 0 || MapPos + 1 >= MapCount)
                {                 // go to next structure & set up mapping
                    KeyListPos++;
                    if (KeyListPos >= Eqp.SearchKeySubset.Count)
                    {
                        return(null);
                    }
                    cid = Eqp.SearchKeySubset[KeyListPos];
                    if (!CidToStructureDict.ContainsKey(cid))
                    {
                        continue;
                    }

                    MoleculeMx cs      = CidToStructureDict[cid];
                    string     molFile = cs.GetMolfileString();
                    //string chime = cs.ChimeString; // debug
                    if (RGroupDecomp.Debug)
                    {
                        DebugLog.StopwatchMessage("t1", sw);
                    }

                    if (DebugMx.False)                     // debug
                    {
                        //molFile = FileUtil.ReadFile(@"C:\Downloads\RGroupTarget.mol");
                        string targetSmiles = "CC1Cn2cc(c(= O)c3c2c(c(c(c3)F)Cl)S1)C(= O)O";
                        string targetChime  = "CYAAFQwADfwQ19aXPcZERR45lQkn08$hZNXzeJ2yaAhDnxxJou4Gq9od8VG1ykiO63fQpvM8W4C6MR$O3VaZjQwrGr5weW3y^BeUEezndoIivvAbQN58EEHVMAsdPaF4LIsqsf$OCBUPHI5njBB2LIBy3i2cwbrD8T8kFVBVWkTCfIUFUtblI0G7vYiEL^svUWCT^m6tF18I7ISJUp^7WkuzpT9LrBSJLmMl5hHXog$68Q6YPb0^Xp0ftxmy7FDSF^sWib6^JrUMhrHtLfJ3yVMTm9RIrvvKMRQvKqq4G1Ooze5pdlpSdzp7MFl0K1zx4tdnNFoUO1kRPpyZks61qbBz2tU0L$svvojoU4yUlf$^MFF0nqSfGVW2PKv9TReb$knLXffdAAhqBN310WUfdJDoQBNX1a5L2uj9ybNNRLYpaZN1p6WYp2WI^ntQEVTBaJF1Uu28N4o2xudURpVITTKO7omUtpgLoaoOQHazmZG3k^aHPUOQfE0d27eAbE^uxcQUAB";
                        molFile = CdkMol.Util.MoleculeTofMolfileString(CdkMol.Util.ChimeStringToMolecule(targetChime));
                    }

                    Molecule target = CdkMol.Util.MolfileStringToMolecule(molFile);
                    if (RGroupDecomp.Debug)
                    {
                        DebugLog.StopwatchMessage("t2", sw);
                    }

                    MapCount = RGroupDecomp.ProcessTargetMolecule(target);                     // process & get number of maps
                    if (MapCount > 1)
                    {
                        MapCount = MapCount;                                   // debug, seems to always be 1
                    }
                    MapPos = -1;

                    if (RGroupDecomp.Debug)
                    {
                        DebugLog.StopwatchMessage("t3", sw);
                    }
                }

                // Get the substituent for each RGroup for the next mapping

                MapPos++;                                        // get data for next mapping

                for (int si = 0; si < Substituents.Length; si++) // clear substituents
                {
                    Substituents[si] = null;
                }

                for (int fi = 0; fi < RgTotalCount; fi++)
                {
                    Molecule fragment = RGroupDecomp.GetIthMappingFragment(MapPos, fi);
                    if (fragment == null)
                    {
                        break;                                       // must have reached the end
                    }
                    if (RGroupDecomp.Debug)
                    {
                        DebugLog.StopwatchMessage("t4", sw);
                    }

                    int ri = CdkMol.Util.GetFragmentRGroupAssignment(fragment);

                    if (ri >= 0 && ri < Substituents.Length)
                    {
                        Substituents[ri] = fragment;
                    }
                }

                if (RGroupDecomp.Debug)
                {
                    DebugLog.StopwatchMessage("t5", sw);
                }

                // Fill in the Vo

                object[] vo = new object[Qt.SelectedCount];
                for (int ci = 0; ci < SelectList.Count; ci++)
                {
                    MetaColumn mc   = SelectList[ci];
                    string     name = mc.Name.ToLower();

                    if (mc.IsKey)
                    {
                        vo[ci] = Eqp.SearchKeySubset[KeyListPos];
                    }

                    else if (Lex.Eq(name, "Core"))
                    {
                        vo[ci] = CoreChemicalStructure;
                    }

                    else if (Lex.Eq(name, "Terminate_Option"))
                    {
                        vo[ci] = TerminateOptionString;
                    }

                    else if (Lex.Eq(name, "Map_Number"))
                    {
                        vo[ci] = (MapPos + 1).ToString();
                    }

                    else if (Lex.Eq(name, "Map"))
                    {
                        if (RGroupDecomp.Debug)
                        {
                            DebugLog.StopwatchMessage("t6", sw);
                        }

                        MoleculeMx cs = null;

                        try                         // hilight core molecule
                        {
                            if (StrMatcher == null) // initialize matcher with core structure if not done yet
                            {
                                StrMatcher = new StructureMatcher();
                                string molfile2 = CdkMol.Util.RemoveRGroupAttachmentPointAtoms(CoreChemicalStructure.GetMolfileString());
                                if (RGroupDecomp.Debug)
                                {
                                    DebugLog.StopwatchMessage("t7", sw);
                                }

                                MoleculeMx cs2 = new MoleculeMx(MoleculeFormat.Molfile, molfile2);
                                StrMatcher.SetSSSQueryMolecule(cs2);                                 // set core query used for highlighting
                                if (RGroupDecomp.Debug)
                                {
                                    DebugLog.StopwatchMessage("t8", sw);
                                }
                            }

                            Molecule alignedTarget = RGroupDecomp.GetAlignedTargetForMapping(MapPos);
                            if (RGroupDecomp.Debug)
                            {
                                DebugLog.StopwatchMessage("t9", sw);
                            }

                            string chime = CdkMol.Util.MoleculeToChimeString(alignedTarget);
                            if (RGroupDecomp.Debug)
                            {
                                DebugLog.StopwatchMessage("t10", sw);
                            }

                            cs = new MoleculeMx(MoleculeFormat.Chime, chime);
                            cs = StrMatcher.HighlightMatchingSubstructure(cs);
                        }
                        catch (Exception ex) { ex = ex; }

                        vo[ci] = cs;                         // store value

                        if (RGroupDecomp.Debug)
                        {
                            DebugLog.StopwatchMessage("t11", sw);
                        }
                    }

                    else if (Lex.Eq(name, "Is_Map_Complete"))
                    {
                        vo[ci] = "Yes";                         // always complete for PP
                        //else vo[ci] = "No";
                    }

                    else
                    {                     // must be a substituent
                        if (RGroupDecomp.Debug)
                        {
                            DebugLog.StopwatchMessage("t12", sw);
                        }

                        int ri = name.IndexOf("_");
                        if (ri < 0 || !int.TryParse(name.Substring(1, ri - 1), out ri))
                        {
                            continue;                              // in case of bogus name
                        }
                        substituent = Substituents[ri];

                        if (substituent == null)
                        {
                            vo[ci] = null;                             // no mapping found
                            continue;
                        }

                        if (name.EndsWith("_structure"))
                        {
                            string     chime = CdkMol.Util.MoleculeToChimeString(substituent);
                            MoleculeMx cs    = new MoleculeMx(MoleculeFormat.Chime, chime);
                            vo[ci] = cs;
                        }

                        else if (name.EndsWith("_smiles"))
                        {
                            string smiles = CdkMol.Util.MoleculeToSmilesString(substituent);
                            vo[ci] = smiles;
                        }

                        else if (name.EndsWith("_formula"))
                        {
                            string mf = CdkMol.Util.GetMolFormulaDotDisconnect(substituent);
                            vo[ci] = mf;
                        }

                        else if (name.EndsWith("_weight"))
                        {
                            vo[ci] = CdkMol.Util.GetMolWeight(substituent);
                        }

                        else if (name.EndsWith("_substno"))
                        {
                            vo[ci] = ri;
                        }

                        if (RGroupDecomp.Debug)
                        {
                            DebugLog.StopwatchMessage("t13", sw);
                        }
                    }
                }

                int msTime = (int)sw.ElapsedMilliseconds;
                if (RGroupDecomp.Debug)
                {
                    DebugLog.StopwatchMessage("Total Time for Cid: " + cid, swTotal);
                }

                return(vo);
            }
#endif
        }