/// <summary> /// ReadSqlStatement /// </summary> /// <param name="name"></param> /// <param name="id"></param> /// <param name="sqlStmt"></param> /// <param name="owner"></param> /// <returns></returns> public static bool ReadSqlStatement( string name, int version, out string sqlStmt, out string keyColName, out string owner) { string sql = @" select sql, key_col_name, ownr_id from dev_mbs_owner.mbs_spotfire_sql where name = '" + name + "' and " + "version = " + version; DbCommandMx cmd = new DbCommandMx(); cmd.Prepare(sql); cmd.ExecuteReader(); sqlStmt = keyColName = owner = null; bool exists = cmd.Read(); if (exists) { sqlStmt = cmd.GetClob(0); keyColName = cmd.GetString(1); owner = cmd.GetString(2); } cmd.CloseReader(); cmd.Dispose(); return(exists); }
static ICdkMol CdkMolUtil => StaticCdkMol.I; // static molecule shortcut for utility methods /// <summary> /// UpdateCorpFingerprintDatabaseMx /// /////////////////////////////////////////////////////// /// Syntax: Update FingerprintDatabaseMx [Corp | ChEMBL] [MACCS | ECFP4] [Load | ByCidRange | SinceLastCheckpoint | LoadMissing | <SingleCorpId>] /// /// Corp Examples: /// Update FingerprintDatabaseMx Corp MACCS Load /// Update FingerprintDatabaseMx Corp MACCS LoadMissing /// Update FingerprintDatabaseMx Corp MACCS SinceLastCheckpoint /// /// Update FingerprintDatabaseMx Corp ECFP4 Load /// Update FingerprintDatabaseMx Corp ECFP4 LoadMissing /// Update FingerprintDatabaseMx Corp ECFP4 SinceLastCheckpoint /// /// ChEMBL Examples: /// Update FingerprintDatabaseMx Chembl MACCS Load /// Update FingerprintDatabaseMx Chembl MACCS LoadMissing /// /// Update FingerprintDatabaseMx Chembl ECFP4 Load /// Update FingerprintDatabaseMx Chembl ECFP4 LoadMissing /// /////////////////////////////////////////////////////// /// </summary> /// <param name="argString"></param> /// <returns></returns> static public string Update( string argString) { MoleculeMx mol; double mw; string chime, smiles, molString, molFile = ""; string msg = "", sql = "", chemblId, cid = "", maxCorpIdSql, maxIdSql2, mf, missingFixCriteria = "", CorpIdList = ""; int storeChunkCount = 0, CorpId, molregno, molId, lowId = 0, highId = 0, maxDestId = 0, maxSrcId = 0; int readCount = 0, storeCount = 0; ByCheckpoint = ByCidRange = ByCidList = LoadIfMissing = false; ReadChunkSize = DefaultReadChunkSize; WriteChunkSize = DefaultWriteChunkSize; Failures = new Dictionary <string, string>(); NewUndefinedStructureCids = new List <string>(); LastFailure = ""; FailureCount = 0; // global try loop try { //////////////////////// /// Parse Parameters /// //////////////////////// // See which database argString = argString.Trim(); if (Lex.StartsWith(argString, "Corp")) { Database = "Corp"; argString = argString.Substring(5).Trim(); } else if (Lex.StartsWith(argString, "Chembl")) { Database = "ChEMBL"; argString = argString.Substring(6).Trim(); } else { return(SyntaxMsg); } // See which fingerprint type FingerprintType = FingerprintType.MACCS; // default to MACCS if type not defined if (Lex.TryReplace(ref argString, "MACCS", "")) { FingerprintType = FingerprintType.MACCS; argString = argString.Trim(); } else if (Lex.TryReplace(ref argString, "ECFP4", "")) { FingerprintType = FingerprintType.Circular; // (i.e. ECFP4) argString = argString.Trim(); } FpDao = new FingerprintDao(Database, FingerprintType); List <FingerprintRec> fpRecList = new List <FingerprintRec>(); string args = argString.Trim(); string initialMsg = "Update FingerprintDatabase started: " + args; CidList = new List <string>(); // init empty list ////////////////////// /// Corp Database /// ////////////////////// if (CorpDatabase) { if (Lex.Eq(args, "Load")) { ByCidRange = true; ShowProgress("Getting range of CorpIds to insert..."); maxCorpIdSql = SelectMaxCorpId; // get highest id in source db maxSrcId = SelectSingleValueDao.SelectInt(maxCorpIdSql); if (maxSrcId < 0) { maxSrcId = 0; } maxDestId = GetMaxDestMolId(); //maxIdSql2 = "select max(src_compound_id_nbr) from dev_mbs_owner.corp_uc_xref where src_id = 0"; // get highest id in UniChemDb db //highCorpId = SelectSingleValueDao.SelectInt(maxIdSql2); if (maxDestId < 0) { maxDestId = 0; } } else if (Lex.Eq(args, "SinceLastCheckpoint")) { ByCheckpoint = true; ShowProgress("Getting list of CorpIds updated since last checkpoint..."); CidList = GetNewAndModifiedCorpIdList(out CidUpdateDateDict); //CidUpdateList = new List<string>(); // debug with single cmpd //CidUpdateList.Add("03435269"); if (CidList.Count == 0) { return("There have been no updates since the last checkpoint"); } initialMsg += ", CorpIds to add/update: " + CidList.Count; } else if (Lex.StartsWith(args, "ByCorpIdList")) { ByCidList = true; CorpIdList = args.Substring("ByCorpIdList".Length).Trim(); if (Lex.IsUndefined(CorpIdList)) { throw new Exception("Undefined CorpId list"); } } else if (Lex.StartsWith(args, "LoadMissing")) { LoadIfMissing = true; if (args.Contains(" ")) { missingFixCriteria = args.Substring("LoadMissing".Length).Trim(); } ShowProgress("Getting list of missing CorpIds..."); CidList = GetMissingCidList(); if (CidList.Count == 0) { return("There are no missing CorpIds"); } initialMsg += ", Missing CorpIds: " + CidList.Count; } else if (int.TryParse(args, out maxSrcId)) // single CorpId { ByCidRange = true; maxDestId = maxSrcId - 1; // say 1 less is the max we have } else { return(SyntaxMsg); } } /////////////////////// /// ChEMBL Database /// /////////////////////// else if (ChemblDatabase) { if (Lex.Eq(args, "Load")) { ByCidRange = true; ShowProgress("Getting range of MolRegNos to insert..."); sql = "select max(molregno) from chembl_owner.compound_struct_xxxxxx"; maxSrcId = SelectSingleValueDao.SelectInt(sql); if (maxSrcId < 0) { maxSrcId = 0; } maxDestId = GetMaxDestMolId(); if (maxDestId < 0) { maxDestId = 0; } } else if (Lex.StartsWith(args, "LoadMissing")) { LoadIfMissing = true; ShowProgress("Getting list of missing ChEMBL Ids..."); CidList = GetMissingCidList(); if (CidList.Count == 0) { return("There are no missing Ids"); } initialMsg += ", Missing Chembl Ids: " + CidList.Count; } else { return(SyntaxMsg); } } else { return(SyntaxMsg); } CidListOriginalCount = CidList.Count; Log(initialMsg); ///////////////////////////// // Loop over chunks of data ///////////////////////////// for (int chunk = 1; ; chunk++) { ////////////////////// /// Corp Database /// ////////////////////// if (CorpDatabase) { if (ByCheckpoint) // single chunk { string cidList = GetNextListChunk(); if (Lex.IsUndefined(cidList)) { break; } sql = SelectByCorpIdCriteria; sql = Lex.Replace(sql, "<CorpIdCriteria>", "in (" + cidList + ")"); string matchString = "order by m.corp_nbr"; if (!Lex.Contains(sql, matchString)) { throw new Exception(matchString + " not found"); } sql = Lex.Replace(sql, matchString, "order by m.molecule_date"); msg = "Processing " + CidListOriginalCount + " updates since " + CheckpointDateTime; // + " (" + Mobius.Data.CidList.FormatCidListForDisplay(null, chunkCidList) + ")"; } else if (ByCidRange) // by CorpId range { if (maxDestId >= maxSrcId) { break; // done } lowId = maxDestId + 1; // start of next chunk highId = lowId + ReadChunkSize; maxDestId = highId; //lowCorpId = highCorpId = 12345; // debug if (highId >= maxSrcId) { highId = maxSrcId; } sql = SelectByCorpIdCriteria; sql = Lex.Replace(sql, "<CorpIdCriteria>", "between " + lowId + " and " + highId); msg = "Processing CorpId range: " + lowId + " to " + highId; } else if (ByCidList) // by single user-supplied CorpId list { if (chunk > 1) { break; // break 2nd time through } sql = SelectByCorpIdCriteria; sql = Lex.Replace(sql, "<CorpIdCriteria>", "in (" + CorpIdList + ")"); msg = "Processing CorpId list: " + CorpIdList; } else if (LoadIfMissing) { string cidList = GetNextListChunk(); if (Lex.IsUndefined(cidList)) { break; // all done } sql = SelectByCorpIdCriteria; sql = Lex.Replace(sql, "<CorpIdCriteria>", "in (" + cidList + ")"); msg = "Processing missing CorpId Chunk: " + Mobius.Data.CidList.FormatAbbreviatedCidListForDisplay(null, cidList) + ", Total Ids: " + CidListOriginalCount; Log(msg); } else { return(SyntaxMsg); } } /////////////////////// /// ChEMBL Database /// /////////////////////// else if (ChemblDatabase) { if (ByCidRange) // by CID range { if (maxDestId >= maxSrcId) { break; // done } lowId = maxDestId + 1; // start of next chunk highId = lowId + ReadChunkSize; maxDestId = highId; //lowId = highId = 12345; // debug if (maxDestId >= maxSrcId) { maxDestId = maxSrcId; } sql = SelectChemblSql; sql = Lex.Replace(sql, "<molregnoCriteria>", "between " + lowId + " and " + highId); msg = "Processing ChEMBL MolRegNo range: " + lowId + " to " + highId; } else if (LoadIfMissing) { string cidList = GetNextListChunk(); if (Lex.IsUndefined(cidList)) { break; // all done } sql = SelectByCorpIdCriteria; sql = Lex.Replace(sql, "<CorpIdCriteria>", "in (" + cidList + ")"); msg = "Processing missing ChEMBL Id Chunk: " + Mobius.Data.CidList.FormatAbbreviatedCidListForDisplay(null, cidList) + ", Total Ids: " + CidListOriginalCount; } else { return(SyntaxMsg); } } else { return(SyntaxMsg); } ShowProgress(msg); // Execute the SQL to get the rows for the chunk DbCommandMx rdr = DbCommandMx.PrepareAndExecuteReader(sql); DateTime lastShowProgressTime = DateTime.MinValue; /////////////////////////////////////////// /// Loop over rows in the current chunk /// /////////////////////////////////////////// while (true) { // Update progress display if (DateTime.Now.Subtract(lastShowProgressTime).TotalSeconds > 1) // show progress { int storeTotalCount = storeCount + storeChunkCount; string msg2 = msg + "\r\n" + "Reads: " + readCount + "\r\n" + "Undefined: " + NewUndefinedStructureCids.Count + "\r\n" + "Insert/Updates: " + storeTotalCount + "\r\n" + "Failures: " + FailureCount + "\r\n" + "Failure Types: " + Failures.Count + "\r\n" + "Last Failure: " + LastFailure; ShowProgress(msg2); lastShowProgressTime = DateTime.Now; } // Read and process next compound bool readOk = rdr.Read(); if (readOk) { readCount++; try { double t1 = 0, t2 = 0, t3 = 0, t4 = 0; DateTime t0 = DateTime.Now; mol = null; //t2 = TimeOfDay.Delta(ref t0); ////////////////////// /// Corp Database /// ////////////////////// if (CorpDatabase) { CorpId = rdr.GetInt(0); // corp_nbr //Log("CorpId: " + CorpId); // debug molId = CorpId; cid = CorpId.ToString(); cid = CompoundId.NormalizeForDatabase(cid); if (!rdr.IsNull(1)) // be sure chime field isn't null { chime = rdr.GetClob(1); if (Lex.IsDefined(chime)) { molFile = MoleculeMx.ChimeStringToMolfileString(chime); // convert Chime to MolFile mol = new MoleculeMx(MoleculeFormat.Molfile, molFile); } } MoleculeDateTime = rdr.GetDateTimeByName("Molecule_Date"); // Date molecule was updated in the CorpDB cartridge DB } /////////////////////// /// ChEMBL Database /// /////////////////////// else // chembl { molId = molregno = rdr.GetInt(0); cid = chemblId = rdr.GetString(1); smiles = rdr.GetString(2); if (Lex.IsDefined(smiles)) { mol = new MoleculeMx(MoleculeFormat.Smiles, smiles); } } if (MoleculeMx.IsUndefined(mol) || mol.AtomCount <= 1) { NewUndefinedStructureCids.Add(cid); continue; //mol = new AtomContainer(); // write empty structure } bool includeOverallFingerprint = true; List <BitSetFingerprint> fps = CdkMol.BuildBitSetFingerprints(mol.MolfileString, includeOverallFingerprint, FingerprintType); //t3 = TimeOfDay.Delta(ref t0); foreach (BitSetFingerprint fp in fps) { FingerprintRec fpr = new FingerprintRec(); fpr.molId = molId; fpr.SrcId = SrcDbId; fpr.Cid = cid; fpr.Cardinality = fp.cardinality(); fpr.Fingerprint = fp.asBitSet().toLongArray(); fpRecList.Add(fpr); } //t4 = TimeOfDay.Delta(ref t0); t4 = t4; } catch (Exception ex) { if (!Failures.ContainsKey(ex.Message)) { Failures.Add(ex.Message, cid); } else { Failures[ex.Message] += ", " + cid; } LastFailure = "Cid: " + cid + " - " + ex.Message; Log(LastFailure); //ShowProgress(ex.Message + "\r\n" + ex.StackTrace.ToString()); // debug FailureCount++; continue; } storeChunkCount++; } bool commitTransaction = (storeChunkCount >= WriteChunkSize || (!readOk && storeChunkCount > 0)); if (commitTransaction) // end of chunk of data to store? { // if updating by CheckPoint date range then merge existing data with new/updated data if (ByCheckpoint) { if (readCount > 0 && (storeCount > 0 || FailureCount == 0)) // make sure not everything has failed) { MergeRecordsIntoFiles(fpRecList); } } // Simple append of records to files else { FpDao.OpenWriters("bin", FileMode.Append); // open bin files for append foreach (FingerprintRec fpr in fpRecList) // write out buffered recs { FpDao.WriteFingerprintRec(fpr); } FpDao.CloseWriters(); int cnt = fpRecList.Count; if (cnt > 0) { string cid1 = fpRecList[0].Cid; string cid2 = fpRecList[cnt - 1].Cid; Log("Records Appended: " + cnt + ", CIDS: " + cid1 + " - " + cid2); } else { Log("Records Appended: 0"); } } fpRecList.Clear(); storeCount += storeChunkCount; storeChunkCount = 0; } if (!readOk) { break; } } // end of read loop for rows in a chunk rdr.Dispose(); } // end for loop of chunks DeleteTempFiles(); if (LoadIfMissing) // update list of cids with missing structures { ExistingUndefinedStructureCids.UnionWith(NewUndefinedStructureCids); FpDao.WriteUndefinedStructuresCids(ExistingUndefinedStructureCids); } msg = "*** Update Complete ***\r\n\r\n" + msg; ShowProgress(msg); System.Threading.Thread.Sleep(100); string logMsg = "UpdateFingerprintDb - CIDs stored: " + storeCount + ", Undefined structures: " + NewUndefinedStructureCids.Count + ", failures: " + FailureCount + "\r\n"; foreach (string key in Failures.Keys) { logMsg += key + " - CIDs: " + Failures[key] + "\r\n"; } Log(logMsg); return(logMsg); } // end of main try loop catch (Exception ex) { Log(DebugLog.FormatExceptionMessage(ex)); throw new Exception(ex.Message, ex); } }
/// <summary> /// Retrieve any existing SVG for the list of supplied molecules /// The Id column should contain the CorpId /// </summary> /// <param name="molList"></param> public static int SelectMoleculeListSvg( List <MoleculeMx> molList) { MoleculeMx mol; int corpId, molSvgsFetchedCount = 0; string corpIdString, molString, svg; const string sql = @" SELECT corp_nbr, molstructure svgString FROM mbs_owner.corp_moltable_mx WHERE corp_nbr in (<list>) and molstructure is not null " ; if (!Security.UserInfo.Privileges.CanRetrieveStructures) // structures allowed? { return(0); } //if (DebugMx.True) return 0; // debug, don't use existing values List <string> lsnList = new List <string>(); Dictionary <string, MoleculeMx> molDict = new Dictionary <string, MoleculeMx>(); foreach (MoleculeMx mol0 in molList) // set up a dict keyed by cid with mol values { if (mol0.PrimaryFormat != MoleculeFormat.Helm || Lex.IsUndefined(mol0.PrimaryValue)) { continue; } if (int.TryParse(mol0.Id, out corpId)) { molDict[mol0.Id] = mol0; } } if (molDict.Count == 0) { return(0); } DbCommandMx cmd = new DbCommandMx(); cmd.PrepareListReader(sql, DbType.String); cmd.ExecuteListReader(new List <string>(molDict.Keys)); while (cmd.Read()) { corpId = cmd.GetInt(0); if (!cmd.IsNull(1)) // molstructure { molString = cmd.GetClob(1); if (!SvgUtil.IsSvgString(molString)) { continue; // skip if not SVG } svg = molString; // should be compressed format SVG corpIdString = CompoundId.Normalize(corpId.ToString()); if (Lex.IsDefined(svg) && molDict.ContainsKey(corpIdString)) { mol = molDict[corpIdString]; mol.SvgString = svg; molSvgsFetchedCount++; } } } cmd.CloseReader(); return(molSvgsFetchedCount); }
/// <summary> /// Sync the Mobius CorpMoltable replicate used to retrieve Smiles /// Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>] /// </summary> /// <returns></returns> public static string UpdateCorpDbMoltableMx( string args) { DateTime moleculeDateTime = DateTime.MinValue; double mw; string msg = "", sql = "", maxCorpIdSql, mf = "", chime = "", smiles = "", checkPointDate = "", helm = "", sequence = "", svg = ""; object[][] pva = null; int pvaCount = 0, CorpId, lowCorpId = 0, highCorpId = 0, srcMaxCorpId = 0; int SelectChunkSize = 20; // small chunks int InsertBufferSize = 10; //int SelectChunkSize = 100000; // big chunks //int InsertBufferSize = 1000; // Select data from corp_moltable by CorpId range const string SelectByCorpIdRange = @" SELECT m.corp_nbr, chime(m.ctab), m.molformula, m.molweight, null molsmiles, s.helm_txt, s.sequence_txt, m.molecule_date FROM corp_owner.corp_moltable m, corp_owner.corp_substance s where m.corp_nbr > 0 and s.corp_nbr = m.corp_nbr and (s.status_code is null or s.status_code = 'A') ORDER BY corp_nbr"; // Select data from corp_moltable by date range comparing to corp_moltable_mx const string SelectByDateRange = @" select m.corp_nbr, chime(m.ctab), m.molformula, m.molweight, null molsmiles, s.helm_txt, s.sequence_txt, m.molecule_date, m2.molecule_date from corp_owner.corp_moltable m, corp_owner.corp_substance s, corp_moltable_mx m2 where m.molecule_date > to_date('1-jan-1900 000000','DD-MON-YYYY HH24MISS') and s.corp_nbr = M.CORP_NBR and (s.status_code is null or s.status_code = 'A') and m2.corp_nbr (+) = m.corp_nbr and m2.molecule_date (+) != m.molecule_date order by m.molecule_date" ; // Select for missing smiles strings, ex: Update CorpDbMoltableMx LoadMissing mx.molecule_date > '1-jan-2014' const string SelectMissingSmilesFix = @" select /* check for CorpIds in corp_moltable not in corp_moltable_mx */ corp_nbr, chime(ctab), molformula, molweight, null molsmiles, helm_txt, sequence_txt, molecule_date from ( select m.*, s.helm_txt, s.sequence_txt, mx.molsmiles molsmiles_mx from corp_owner.corp_moltable m, corp_owner.corp_substance s, corp_moltable_mx mx where s.corp_nbr = M.CORP_NBR and (s.status_code is null or s.status_code = 'A') and mx.corp_nbr (+) = m.corp_nbr and 1=1 /* condition to substitute */ ) m where molsmiles_mx is null /* extra condition */ order by corp_nbr" ; // Insert missing helm info const string SelectMissingHelmFix = @" select /* check for CorpIds in corp_moltable not in corp_moltable_mx */ corp_nbr, chime(ctab), molformula, molweight, null molsmiles, helm_txt, sequence_txt, molecule_date from ( select m.*, s.helm_txt, s.sequence_txt, mx.molsmiles molsmiles_mx from corp_owner.corp_moltable m, corp_owner.corp_substance s, corp_moltable_mx mx where s.corp_nbr = M.CORP_NBR and (s.status_code is null or s.status_code = 'A') and mx.corp_nbr (+) = m.corp_nbr and 1=1 /* condition to substitute */ ) m where length(helm_txt) > 0 /* extra condition */ order by corp_nbr" ; // Secondary "large" structure table (~5k mols) const string SelectLargeMols = @" select corp_nbr, to_clob(molstructure), to_clob(molformula), molweight, molsmiles, null helm_txt, null sequence_txt, molecule_date from (select corp_srl_nbr corp_nbr, 'CompoundId=' || corp_srl_nbr molstructure, null ctab, mlclr_frml_txt molformula, mlclr_wgt molweight, null molsmiles, null molecule_date from rdm_owner.rdm_sbstnc where rdw_src_cd = 'LRG'" ; // Insert statement const string InsertSql = @" insert into mbs_owner.corp_moltable_mx ( corp_nbr, molstructure, molformula, molweight, molsmiles, molecule_date) values (:0, :1, :2, :3, :4, :5)" ; // Build select sql bool byDateRange = false, byCorpIdRange = false, missingFix = true, deleteExisting = true; string missingFixCriteria = ""; if (Lex.IsUndefined(args) || Lex.Eq(args, "ByDateRange")) { byDateRange = true; } else if (Lex.Eq(args, "ByCorpIdRange")) { byCorpIdRange = true; Progress.Show("Getting range of CorpIds to insert..."); maxCorpIdSql = "select max(corp_nbr) from corp_owner.corp_moltable"; // get highest CorpId in source db srcMaxCorpId = SelectSingleValueDao.SelectInt(maxCorpIdSql); if (srcMaxCorpId < 0) { srcMaxCorpId = 0; } maxCorpIdSql = "select max(corp_nbr) from mbs_owner.corp_moltable_mx"; // get highest CorpId in dest db highCorpId = SelectSingleValueDao.SelectInt(maxCorpIdSql); if (highCorpId < 0) { highCorpId = 0; } } else if (Lex.StartsWith(args, "LoadMissing")) { missingFix = true; if (args.Contains(" ")) { missingFixCriteria = args.Substring(10).Trim(); } } else if (int.TryParse(args, out srcMaxCorpId)) // single CorpId { byCorpIdRange = true; highCorpId = srcMaxCorpId - 1; // say 1 less is the max we have } else { return("Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>]"); } Log("UpdateCorpDbMoltableMx started: " + args); int readCount = 0, insCount = 0, insertCount = 0, updateCount = 0, undefinedStructures = 0, smilesSuccess = 0, smilesFails = 0, helmStructures = 0; List <string> CorpIdList = new List <string>(); for (int chunk = 1; ; chunk++) // loop over chunks { if (byDateRange) // single chunk { if (chunk > 1) { break; // break 2nd time through } checkPointDate = UserObjectDao.GetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", "01-sep-2013 000000"); //UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate); sql = Lex.Replace(SelectByDateRange, "1-jan-1900 000000", checkPointDate); msg = "Reading where date >= " + checkPointDate; } else if (byCorpIdRange) // by CorpId range { if (highCorpId >= srcMaxCorpId) { break; // done } lowCorpId = highCorpId + 1; // start of next chunk highCorpId = lowCorpId + SelectChunkSize; if (highCorpId >= srcMaxCorpId) { highCorpId = srcMaxCorpId; } sql = Lex.Replace(SelectByCorpIdRange, "corp_nbr > 0", "corp_nbr between " + lowCorpId + " and " + highCorpId); msg = "Reading: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount; } else if (missingFix) { if (chunk > 1) { break; // break 2nd time through } sql = SelectMissingHelmFix; if (Lex.IsDefined(missingFixCriteria)) // substitute any criteria { sql = Lex.Replace(sql, "1=1", missingFixCriteria); } msg = "Fixing missing data"; } Progress.Show(msg); DbCommandMx readCmd = new DbCommandMx(); readCmd.MxConn = DbConnectionMx.GetConnection("prd123"); readCmd.PrepareUsingDefinedConnection(sql, null); DbDataReader rdr = readCmd.ExecuteReader(); DbCommandMx insertCmd = new DbCommandMx(); OracleDbType[] pta = new OracleDbType[6]; pta[0] = OracleDbType.Int32; // corp_nbr pta[1] = OracleDbType.Clob; // molstructure pta[2] = OracleDbType.Clob; // molformula pta[3] = OracleDbType.Double; // molweight pta[4] = OracleDbType.Clob; // smiles pta[5] = OracleDbType.Date; // molecule_date insertCmd.Prepare(InsertSql, pta); insertCmd.BeginTransaction(); // be sure we have a transaction going pva = DbCommandMx.NewObjectArrayArray(6, InsertBufferSize); // alloc insert row array object[] vo = new object[6]; while (true) { bool readOk = rdr.Read(); if (readOk) { rdr.GetValues(vo); CorpId = readCmd.GetInt(0); // corp_nbr vo[0] = CorpId; CorpIdList.Add(CorpId.ToString()); if (!readCmd.IsNull(1)) // molstructure { chime = readCmd.GetClob(1); chime = OracleMx.ClearStringIfExceedsMaxStringSize(chime); vo[1] = chime; } else { chime = ""; } if (!readCmd.IsNull(2)) // molformula { mf = readCmd.GetClob(2); mf = OracleMx.ClearStringIfExceedsMaxStringSize(mf); vo[2] = mf; } if (!readCmd.IsNull(3)) // molweight { mw = readCmd.GetDouble(3); vo[3] = mw; } if (Lex.IsDefined(chime)) // molsmiles - calculate from chime string { MoleculeMx cs = new MoleculeMx(MoleculeFormat.Chime, chime); if (cs.AtomCount > 1) // need more than one atom { MoleculeMx cs2 = cs.ConvertTo(MoleculeFormat.Smiles); smiles = cs2.GetSmilesString(); if (Lex.IsDefined(smiles)) { smilesSuccess++; } else { Log("Smiles conversion failure for CorpId: " + CorpId); smilesFails++; } smiles = OracleMx.ClearStringIfExceedsMaxStringSize(smiles); vo[4] = smiles; } else { undefinedStructures++; } } else { undefinedStructures++; } if (!readCmd.IsNull(5)) { helm = readCmd.GetClob(5); if (Lex.IsDefined(helm)) { svg = HelmControl.GetSvg(helm); vo[1] = SvgUtil.CompressSvgString(svg); // store compressed svg in molstructure column for now helmStructures++; } } if (!readCmd.IsNull(6)) { sequence = readCmd.GetClob(6); if (Lex.IsDefined(sequence)) { // nothing yet } } moleculeDateTime = DateTime.MinValue; if (!readCmd.IsNull(7)) // molecule_date { moleculeDateTime = readCmd.GetDateTime(7); vo[5] = moleculeDateTime; } for (int pi = 0; pi < 6; pi++) // invert for insert { pva[pi][pvaCount] = vo[pi]; } if (Debug) { msg = String.Format("CorpId: {0}, mf: {1}, chime: {2}, smiles: {3}", CorpId.ToString(), mf.Length, chime.Length, smiles.Length); Log(msg); } pvaCount++; } if (pvaCount >= InsertBufferSize || (!readOk && pvaCount > 0)) // write if buffer full or at end { try { if (deleteExisting) { int delCount = DoDeletes(CorpIdList); updateCount += delCount; // count deletes as updates insertCount -= delCount; // subtract from inserts } CorpIdList.Clear(); insCount = insertCmd.ExecuteArrayNonReader(pva, ref pvaCount); insertCmd.Commit(); insertCmd.BeginTransaction(); insertCount += insCount; } catch (Exception ex) { throw new Exception(ex.Message, ex); } if (byDateRange) { string checkPointDate2 = String.Format("{0:dd-MMM-yyyy HHmmss}", moleculeDateTime); // format date time that will work with oracle UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate2); msg = "Processing where date >= " + checkPointDate + ", Reads: " + readCount + ", Inserts: " + insertCount + ", Updates: " + updateCount; } else if (byCorpIdRange) // CorpId range { msg = "Processing: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount; } else if (missingFix) { msg = "Fixing missing smiles, Updates: " + updateCount; } msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures); Progress.Show(msg); } if (!readOk) { break; } readCount++; } readCmd.Dispose(); insertCmd.Dispose(); } // end for select chunk msg = "UpdateCorpDbMoltableMx - Inserts: " + insertCount + ", Updates: " + updateCount; msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures); Log(msg); return(msg); }