Exemple #1
0
		/// <summary>
		/// Calculate Tanimoto similarity between two compounds
		/// </summary>
		/// <param name="fp1"></param>
		/// <param name="rp2"></param>
		/// <returns></returns>

		double CalculateSimilarity(
			CompoundStructureActivityData cd1,
			CompoundStructureActivityData cd2)
		{
			int l1 = -1, l2 = -1;

			if (cd1.BitsetFingerprint == null || cd2.BitsetFingerprint == null)
				return 0;

			if (Debug)
			{
				if ((cd1.Cid == "111" && cd2.Cid == "222") ||
				 (cd2.Cid == "222" && cd1.Cid == "111"))
				{
					DebugLog.Message("Before: " + cd1.Cid + ": " + String.Join(", ", CdkMolUtil.GetBitSet(cd1.BitsetFingerprint)));
					DebugLog.Message("Before: " + cd2.Cid + ": " + String.Join(", ", CdkMolUtil.GetBitSet(cd2.BitsetFingerprint)));
				}
			}

			double sim = MoleculeMx.CalculateBitSetFingerprintSimilarity(cd1.BitsetFingerprint, cd2.BitsetFingerprint);

			if (Debug)
			{
				if ((cd1.Cid == "111" && cd2.Cid == "222") ||
				 (cd2.Cid == "222" && cd1.Cid == "111"))
				{
					DebugLog.Message(cd1.Cid + ": " + String.Join(", ", CdkMolUtil.GetBitSet(cd1.BitsetFingerprint)));
					DebugLog.Message(cd2.Cid + ": " + String.Join(", ", CdkMolUtil.GetBitSet(cd2.BitsetFingerprint)));
					DebugLog.Message(cd1.Cid + ", " + cd2.Cid + " Similarity: " + sim);
				}
			}

			return sim;
		}
Exemple #2
0
		/// <summary>
		/// Read input data from database
		/// </summary>
		/// <param name="smp">
		/// <returns></returns>

		List<CompoundStructureActivityData> ReadData(
			SasMapParms smp)
		{
			MetaColumn activityMc = smp.EndpointMc;
			QueryColumn keyCriteriaQc = smp.KeyCriteriaQc;

			AssertMx.IsNotNull(activityMc, "mc");
			AssertMx.IsNotNull(keyCriteriaQc, "keyCriteriaQc");

			MetaTable mt, mt2;
			MetaColumn mc2 = null;

			Query q = new Query();
			mt = activityMc.MetaTable;
			QueryTable qt = new QueryTable(mt);
			if (mt.SummarizedExists && !mt.UseSummarizedData)
			{ // retrieve summarized data if exists 
				mt2 = MetaTableCollection.Get(mt.Name + MetaTable.SummarySuffix);
				if (mt2 != null)
				{
					mc2 = mt2.GetMetaColumnByName(activityMc.Name);
					if (mc2 == null) mc2 = mt2.GetMetaColumnByLabel(activityMc.Label);
				}

				if (mc2 != null) // same column available in summarized?
				{
					mt = mt2;
					activityMc = mc2;
				}
			}

			SMP.KeyCriteriaQc.CopyCriteriaToQueryKeyCritera(q);
			q.KeyCriteriaDisplay = SMP.KeyCriteriaQc.CriteriaDisplay;

			qt.SelectKeyOnly();
			QueryColumn qc = qt.GetQueryColumnByName(activityMc.Name);
			qc.Selected = true;
			q.AddQueryTable(qt);

			QueryEngine qe = new QueryEngine();
			List<string> keyList = qe.ExecuteQuery(q); // note that keylist may be empty if single-step query

			HashSet<string> keySet = new HashSet<string>(StringComparer.OrdinalIgnoreCase);

			List<CompoundStructureActivityData> data = new List<CompoundStructureActivityData>();

			int rowCount = 0;
			while (true)
			{
				object[] vo = qe.NextRow();
				if (vo == null) break;
				CompoundStructureActivityData cd = new CompoundStructureActivityData();
				string cid = (string)vo[0];
				cd.Cid = cid;
				keySet.Add(cid); // accumulate keys

				object val = vo[2];
				if (NullValue.IsNull(val)) continue;
				if (val is double)
					cd.Activity = (double)val;
				else if (val is Int32)
					cd.Activity = (Int32)val;

				else if (val is NumberMx)
				{
					NumberMx nex = val as NumberMx;
					cd.Activity = nex.Value;
				}

				else if (val is QualifiedNumber)
				{
					QualifiedNumber qn = val as QualifiedNumber;
					cd.Activity = qn.NumberValue;
					//if (qn.Qualifier != null && qn.Qualifier != "" && qn.Qualifier != "=")
					//	continue; // (don't want to do this since may filter out good data (e.g. IC50 <0.0001))
				}

				else continue;

				if (cd.Activity == NullValue.NullNumber) continue;

				data.Add(cd);
				rowCount++;
			}

			// Retrieve structures

			keyList = new List<string>(keySet);
			Dictionary<string, MoleculeMx> csDict = MoleculeUtil.SelectMoleculesForCidList(keyList, qt.MetaTable); // get the structures in a single step

			// Add structures and build/store fingerprints to data

			DebugLog.Message("========== Fingerprints ============");

			foreach (CompoundStructureActivityData cd in data)
			{

				if (!csDict.ContainsKey(cd.Cid) || csDict[cd.Cid] == null) continue;

				if (cd.Cid == "111" || cd.Cid == "222") csDict = csDict; // debug

				MoleculeMx cs = csDict[cd.Cid];
				cd.Structure = cs;

				FingerprintType fpType = FingerprintType.Circular;
				int fpSubtype = -1;

				if (SMP.SimilarityType == SimilaritySearchType.ECFP4) // some issue with ECFP4?
				{
					fpType = FingerprintType.Circular;
					fpSubtype = CircularFingerprintType.ECFP4;
				}

				else if (SMP.SimilarityType == SimilaritySearchType.Normal)
				{
					fpType = FingerprintType.MACCS;
				}

				cd.BitsetFingerprint = cs.BuildBitSetFingerprint(fpType, fpSubtype);
				if (cd.BitsetFingerprint == null) continue; // couldn't build fingerprint (e.g. no structure)

				if (Debug) DebugLog.Message(cd.Cid + ": " + Lex.Join(CdkMolUtil.GetBitSet(cd.BitsetFingerprint), ", "));
			}

			return data;
		}