/// <summary> /// GetMaxCorpId /// </summary> /// <returns></returns> public static int GetMaxDestMolId() { int molId, maxMolId = 0; FingerprintDao fpd = new FingerprintDao(Database, FingerprintType); List <string> ids = new List <string>(); if (!fpd.DataFilesExist()) { return(maxMolId); } fpd.OpenReaders(); while (true) { FingerprintRec rec = fpd.ReadFingerprintRec(); if (rec == null) { break; } //CorpIds.Add(rec.Cid); // debug if (CorpDatabase) { int.TryParse(rec.Cid, out molId); } else { molId = rec.molId; } if (molId > maxMolId) { maxMolId = molId; } } fpd.CloseReaders(); return(maxMolId); }
/// <summary> /// ExecuteSearch /// </summary> /// <param name="queryMol"></param> public List <StructSearchMatch> ExecuteSearch( IAtomContainer queryMol) { AssertMx.IsTrue(FingerprintType == FingerprintType.MACCS || FingerprintType == FingerprintType.Circular, "Invalid FingerprintType: " + FingerprintType); QueryMol = queryMol; BitSetFingerprint fp = // generate a fingerprint CdkMol.BuildBitSetFingerprintForLargestFragment(queryMol, FingerprintType); QueryFpCardinality = fp.cardinality(); QueryFpLongArray = fp.asBitSet().toLongArray(); MatchList = new List <StructSearchMatch>(); ThreadException = null; foreach (string databaseName in FingerprintDbMx.Databases) // loop on all databases { int srcId = -1; if (Lex.Contains(databaseName, "corp")) { if (!GetCorpSim) { continue; } srcId = StructSearchMatch.CorpDbId; } else if (Lex.Contains(databaseName, "chembl")) { if (!GetChemblSim) { continue; } srcId = StructSearchMatch.ChemblDbId; } if (Debug) { DebugLog.Message("Starting sim search on " + databaseName + " database"); } FpDao = new FingerprintDao(databaseName, FingerprintType); if (!FpDao.DataFilesExist()) { continue; // no files for this database } FileStreamReaders = FpDao.OpenReaders(); FileMatchLists = new List <StructSearchMatch> [FileStreamReaders.Length]; for (int i1 = 0; i1 < FileMatchLists.Length; i1++) { FileMatchLists[i1] = new List <StructSearchMatch>(); } DateTime t0 = DateTime.Now; if (UseMultipleThreads) { ExecuteMultiThreadSearch(); } else { ExecuteSingleThreadSearch(); } double et = TimeOfDay.Delta(ref t0); FpDao.CloseReaders(); List <StructSearchMatch> matchList = MergeIndividualFileMatchLists(); if (KeysToExclude != null || SearchKeySubset != null) // filter by any allowed/disallowed keys { List <StructSearchMatch> matchList2 = new List <StructSearchMatch>(); foreach (StructSearchMatch m0 in matchList) { if (KeysToExclude != null && KeysToExclude.Contains(m0.SrcCid)) { continue; } if (SearchKeySubset != null && !SearchKeySubset.Contains(m0.SrcCid)) { continue; } matchList2.Add(m0); } matchList = matchList2; } matchList.Sort(StructSearchMatch.CompareByMatchQuality); //int removeCount = matchList.Count - MaxHits; // limit to maxhits per database //if (removeCount > 0) // matchList.RemoveRange(MaxHits, removeCount); //foreach (StructSearchMatch ssm0 in matchList) // if (ssm0.SrcId != srcId) ssm0.SrcId = srcId; // debug MatchList.AddRange(matchList); double et2 = TimeOfDay.Delta(ref t0); string msg = string.Format("Search complete (" + databaseName + ").Time : {0:0.00} ", et) + string.Format("{0} Hits: ", FileMatchLists[0].Count); if (Debug) { DebugLog.Message(msg); } for (int hi = 0; hi < 5 && hi < FileMatchLists[0].Count; hi++) { StructSearchMatch sm = FileMatchLists[0][hi]; msg += sm.SrcCid + string.Format(" = {0:0.00}, ", sm.MatchScore); } } // database loop if (ThreadException != null) { throw new Exception(ThreadException.Message, ThreadException); } MatchList.Sort( // sort by decreasing sim value delegate(StructSearchMatch p1, StructSearchMatch p2) { return(p2.MatchScore.CompareTo(p1.MatchScore)); }); if (MaxHits > 0 && MatchList.Count > MaxHits) // remove hits beyond maximum if defined { MatchList.RemoveRange(MaxHits, MatchList.Count - MaxHits); } //ShowProgress(msg); //Thread.Sleep(10000000); return(MatchList); }
/// <summary> /// Merge existing .bin files with new records /// 1. Copy .bin files to .mrg files filtering out cids that have been updated /// 2. Append new records to .mrg files /// 3. Rename .bin files to .bak files and .mrg files to new .bin files /// </summary> static void MergeRecordsIntoFiles( List <FingerprintRec> fpRecList) { string date1, date2; Progress.Show("Merging existing and new files..."); // Copy existing bin file entries filtering out cids that were updated FpDao.OpenReaders("bin"); // open existing .bin files for input FpDao.OpenWriters("mrg", FileMode.Create); // open new merge files for output HashSet <string> fpRecHash = new HashSet <string>(); // build a hash of cids that shouldn't be copied foreach (FingerprintRec fpr in fpRecList) { fpRecHash.Add(fpr.Cid); } int replacementCnt = 0; for (int fi = 0; fi < FingerprintDao.FingerprintFileCount; fi++) // copy each file { BinaryWriter bw = FpDao.BinaryWriters[fi]; while (true) { FingerprintRec fpr = FpDao.ReadFingerprintRec(fi); if (fpr == null) { break; } if (fpRecHash.Contains(fpr.Cid)) { //Log("Removing " + fpr.Cid); // debug replacementCnt++; continue; // skip if this cid is was updated in the incoming list } FpDao.WriteFingerprintRec(bw, fpr); } } FpDao.CloseReaders(); // Append the new records to the merge files foreach (FingerprintRec fpr in fpRecList) // write out buffered recs { FpDao.WriteFingerprintRec(fpr); } FpDao.CloseWriters(); // Backup old files and activate new files for (int fi = 0; fi < FingerprintDao.FingerprintFileCount; fi++) // check that we can backup all bin files { string fileName = FpDao.GetFpFileName(fi) + ".bin"; bool backupOk = FileUtil.CanRename(fileName); if (!backupOk) { throw new Exception("Unable to rename file: " + fileName + ", aborting update"); } } for (int fi = 0; fi < FingerprintDao.FingerprintFileCount; fi++) { string fileName = FpDao.GetFpFileName(fi); bool backupOk = FileUtil.BackupAndReplaceFile(fileName + ".bin", fileName + ".bak", fileName + ".mrg"); if (!backupOk) { throw new Exception("Error replacing file: " + fileName + ".bin"); } } if (ByCheckpoint) { string checkPointDate = String.Format("{0:dd-MMM-yyyy HHmmss}", MoleculeDateTime); // format last new/updated date time in a format that works with Oracle FpDao.WriteFingerPrintSimMxDataUpdateCheckpointDate(checkPointDate); // update checkpoint } Progress.Show("Merging complete..."); int cnt = fpRecList.Count; string cid1 = fpRecList[0].Cid; string cid2 = fpRecList[cnt - 1].Cid; if (CidUpdateDateDict.ContainsKey(cid1)) { date1 = CidUpdateDateDict[cid1].ToString(); } else { date1 = "Missing"; } if (CidUpdateDateDict.ContainsKey(cid2)) { date2 = CidUpdateDateDict[cid1].ToString(); } else { date2 = "Missing"; } Log("Records stored: " + cnt + ", Adds: " + (cnt - replacementCnt) + ", Replacements: " + replacementCnt + ", Date range: " + date1 + " - " + date2 + ", CIDs: First = " + cid1 + ", Last = " + cid2); return; }