Пример #1
0
        /// <summary>
        /// GetMaxCorpId
        /// </summary>
        /// <returns></returns>
        public static int GetMaxDestMolId()
        {
            int            molId, maxMolId = 0;
            FingerprintDao fpd = new FingerprintDao(Database, FingerprintType);
            List <string>  ids = new List <string>();

            if (!fpd.DataFilesExist())
            {
                return(maxMolId);
            }

            fpd.OpenReaders();

            while (true)
            {
                FingerprintRec rec = fpd.ReadFingerprintRec();
                if (rec == null)
                {
                    break;
                }

                //CorpIds.Add(rec.Cid); // debug

                if (CorpDatabase)
                {
                    int.TryParse(rec.Cid, out molId);
                }
                else
                {
                    molId = rec.molId;
                }

                if (molId > maxMolId)
                {
                    maxMolId = molId;
                }
            }

            fpd.CloseReaders();

            return(maxMolId);
        }
Пример #2
0
        /// <summary>
        /// ExecuteSearch
        /// </summary>
        /// <param name="queryMol"></param>

        public List <StructSearchMatch> ExecuteSearch(
            IAtomContainer queryMol)
        {
            AssertMx.IsTrue(FingerprintType == FingerprintType.MACCS || FingerprintType == FingerprintType.Circular,
                            "Invalid FingerprintType: " + FingerprintType);

            QueryMol = queryMol;

            BitSetFingerprint fp =             // generate a fingerprint
                                   CdkMol.BuildBitSetFingerprintForLargestFragment(queryMol, FingerprintType);

            QueryFpCardinality = fp.cardinality();
            QueryFpLongArray   = fp.asBitSet().toLongArray();

            MatchList       = new List <StructSearchMatch>();
            ThreadException = null;

            foreach (string databaseName in FingerprintDbMx.Databases)             // loop on all databases
            {
                int srcId = -1;
                if (Lex.Contains(databaseName, "corp"))
                {
                    if (!GetCorpSim)
                    {
                        continue;
                    }
                    srcId = StructSearchMatch.CorpDbId;
                }

                else if (Lex.Contains(databaseName, "chembl"))
                {
                    if (!GetChemblSim)
                    {
                        continue;
                    }
                    srcId = StructSearchMatch.ChemblDbId;
                }

                if (Debug)
                {
                    DebugLog.Message("Starting sim search on " + databaseName + " database");
                }

                FpDao = new FingerprintDao(databaseName, FingerprintType);

                if (!FpDao.DataFilesExist())
                {
                    continue;                                          // no files for this database
                }
                FileStreamReaders = FpDao.OpenReaders();
                FileMatchLists    = new List <StructSearchMatch> [FileStreamReaders.Length];
                for (int i1 = 0; i1 < FileMatchLists.Length; i1++)
                {
                    FileMatchLists[i1] = new List <StructSearchMatch>();
                }

                DateTime t0 = DateTime.Now;

                if (UseMultipleThreads)
                {
                    ExecuteMultiThreadSearch();
                }
                else
                {
                    ExecuteSingleThreadSearch();
                }

                double et = TimeOfDay.Delta(ref t0);

                FpDao.CloseReaders();

                List <StructSearchMatch> matchList = MergeIndividualFileMatchLists();

                if (KeysToExclude != null || SearchKeySubset != null)                 // filter by any allowed/disallowed keys
                {
                    List <StructSearchMatch> matchList2 = new List <StructSearchMatch>();

                    foreach (StructSearchMatch m0 in matchList)
                    {
                        if (KeysToExclude != null && KeysToExclude.Contains(m0.SrcCid))
                        {
                            continue;
                        }

                        if (SearchKeySubset != null && !SearchKeySubset.Contains(m0.SrcCid))
                        {
                            continue;
                        }

                        matchList2.Add(m0);
                    }

                    matchList = matchList2;
                }

                matchList.Sort(StructSearchMatch.CompareByMatchQuality);

                //int removeCount = matchList.Count - MaxHits; // limit to maxhits per database
                //if (removeCount > 0)
                //	matchList.RemoveRange(MaxHits, removeCount);

                //foreach (StructSearchMatch ssm0 in matchList)
                //	if (ssm0.SrcId != srcId) ssm0.SrcId = srcId; // debug

                MatchList.AddRange(matchList);

                double et2 = TimeOfDay.Delta(ref t0);

                string msg =
                    string.Format("Search complete (" + databaseName + ").Time : {0:0.00} ", et) +
                    string.Format("{0} Hits: ", FileMatchLists[0].Count);

                if (Debug)
                {
                    DebugLog.Message(msg);
                }

                for (int hi = 0; hi < 5 && hi < FileMatchLists[0].Count; hi++)
                {
                    StructSearchMatch sm = FileMatchLists[0][hi];
                    msg += sm.SrcCid + string.Format(" = {0:0.00}, ", sm.MatchScore);
                }
            }             // database loop

            if (ThreadException != null)
            {
                throw new Exception(ThreadException.Message, ThreadException);
            }

            MatchList.Sort(             // sort by decreasing sim value
                delegate(StructSearchMatch p1, StructSearchMatch p2)
                { return(p2.MatchScore.CompareTo(p1.MatchScore)); });

            if (MaxHits > 0 && MatchList.Count > MaxHits)             // remove hits beyond maximum if defined
            {
                MatchList.RemoveRange(MaxHits, MatchList.Count - MaxHits);
            }

            //ShowProgress(msg);
            //Thread.Sleep(10000000);
            return(MatchList);
        }
Пример #3
0
        /// <summary>
        /// Merge existing .bin files with new records
        /// 1. Copy .bin files to .mrg files filtering out cids that have been updated
        /// 2. Append new records to .mrg files
        /// 3. Rename .bin files to .bak files and .mrg files to new .bin files
        /// </summary>

        static void MergeRecordsIntoFiles(
            List <FingerprintRec> fpRecList)
        {
            string date1, date2;

            Progress.Show("Merging existing and new files...");

            // Copy existing bin file entries filtering out cids that were updated

            FpDao.OpenReaders("bin");                            // open existing .bin files for input
            FpDao.OpenWriters("mrg", FileMode.Create);           // open new merge files for output

            HashSet <string> fpRecHash = new HashSet <string>(); // build a hash of cids that shouldn't be copied

            foreach (FingerprintRec fpr in fpRecList)
            {
                fpRecHash.Add(fpr.Cid);
            }

            int replacementCnt = 0;

            for (int fi = 0; fi < FingerprintDao.FingerprintFileCount; fi++)             // copy each file
            {
                BinaryWriter bw = FpDao.BinaryWriters[fi];

                while (true)
                {
                    FingerprintRec fpr = FpDao.ReadFingerprintRec(fi);
                    if (fpr == null)
                    {
                        break;
                    }

                    if (fpRecHash.Contains(fpr.Cid))
                    {
                        //Log("Removing " + fpr.Cid); // debug
                        replacementCnt++;
                        continue;                         // skip if this cid is was updated in the incoming list
                    }

                    FpDao.WriteFingerprintRec(bw, fpr);
                }
            }

            FpDao.CloseReaders();

            // Append the new records to the merge files

            foreach (FingerprintRec fpr in fpRecList)             // write out buffered recs
            {
                FpDao.WriteFingerprintRec(fpr);
            }

            FpDao.CloseWriters();

            // Backup old files and activate new files

            for (int fi = 0; fi < FingerprintDao.FingerprintFileCount; fi++)             // check that we can backup all bin files
            {
                string fileName = FpDao.GetFpFileName(fi) + ".bin";
                bool   backupOk = FileUtil.CanRename(fileName);
                if (!backupOk)
                {
                    throw new Exception("Unable to rename file: " + fileName + ", aborting update");
                }
            }

            for (int fi = 0; fi < FingerprintDao.FingerprintFileCount; fi++)
            {
                string fileName = FpDao.GetFpFileName(fi);
                bool   backupOk = FileUtil.BackupAndReplaceFile(fileName + ".bin", fileName + ".bak", fileName + ".mrg");
                if (!backupOk)
                {
                    throw new Exception("Error replacing file: " + fileName + ".bin");
                }
            }

            if (ByCheckpoint)
            {
                string checkPointDate = String.Format("{0:dd-MMM-yyyy HHmmss}", MoleculeDateTime);   // format last new/updated date time in a format that works with Oracle
                FpDao.WriteFingerPrintSimMxDataUpdateCheckpointDate(checkPointDate);                 // update checkpoint
            }

            Progress.Show("Merging complete...");

            int    cnt  = fpRecList.Count;
            string cid1 = fpRecList[0].Cid;
            string cid2 = fpRecList[cnt - 1].Cid;

            if (CidUpdateDateDict.ContainsKey(cid1))
            {
                date1 = CidUpdateDateDict[cid1].ToString();
            }
            else
            {
                date1 = "Missing";
            }

            if (CidUpdateDateDict.ContainsKey(cid2))
            {
                date2 = CidUpdateDateDict[cid1].ToString();
            }
            else
            {
                date2 = "Missing";
            }

            Log("Records stored: " + cnt + ", Adds: " + (cnt - replacementCnt) + ", Replacements: " + replacementCnt +
                ", Date range: " + date1 + " - " + date2 + ", CIDs: First = " + cid1 + ", Last = " + cid2);

            return;
        }