/// <summary> /// Modal edit of molecule /// </summary> public static MoleculeMx Edit( MoleculeMx mol, MoleculeRendererType editorType = MoleculeRendererType.Unknown, string title = "") { MoleculeMx mol2 = null; try { if (editorType == MoleculeRendererType.Unknown) // need to assign editor type? { if (mol.IsChemStructureFormat) { editorType = MoleculeRendererType.Chemistry; } else if (mol.IsBiopolymerFormat) { editorType = MoleculeRendererType.Helm; } else { editorType = MoleculeRendererType.Chemistry; // final default } } bool useChemEditor = (editorType != MoleculeRendererType.Helm); if (useChemEditor) { mol2 = mol.ConvertTo(MoleculeFormat.Molfile); return(EditCdkMolMolecule(mol2, title)); } else // use Helm editor { return(EditHelmMolecule(mol, title)); } } catch (Exception ex) { XtraMessageBox.Show(ex.Message, "Error editing molecule"); return(null); } }
/// <summary> /// EditHelmMolecule /// </summary> /// <param name="title"></param> /// <param name="mol"></param> /// <returns></returns> public static MoleculeMx EditHelmMolecule( MoleculeMx mol, string title = "") { MoleculeMx mol2 = mol.ConvertTo(MoleculeFormat.Helm); string helm = mol2.HelmString; HelmEditorDialog editor = new HelmEditorDialog(); string newHelm = editor.Edit(helm); if (newHelm == null) { return(null); } mol2.SetPrimaryTypeAndValue(MoleculeFormat.Helm, newHelm); return(mol2); }
private void DelayedEditMolecule(object arg) { try { int ci = (int)arg; string txt = Criteria.Text.Substring(ci + 16); // pick up structure name int i1 = txt.IndexOf("]"); if (i1 < 0) { return; } string sid = txt.Substring(0, i1).Trim().ToUpper(); string chime = ""; if (LabeledCriteria.Structures.ContainsKey(sid)) { chime = LabeledCriteria.Structures[sid]; } MoleculeMx mol = new MoleculeMx(MoleculeFormat.Chime, chime); MoleculeMx mol2 = MoleculeEditor.Edit(mol, MoleculeRendererType.Unknown, "Edit Structure"); if (mol2 == null) { return; } if (mol2.IsChemStructureFormat) // store chime for chem structure { mol2.ConvertTo(MoleculeFormat.Chime); } LabeledCriteria.Structures[sid] = mol2.GetPrimaryTypeAndValueString(); return; } catch (Exception ex) { return; } }
/// <summary> /// Search button clicked, process input /// </summary> /// <returns></returns> DialogResult ProcessInput() { CidList cidList = null; StreamReader structureFileReader = null; string qid; // query identifier, compoundId, file name or sdFile key value QueryManager qm; DataTableMx dt; DataColumn dc; DataRowMx dr; //object[] dr; // if using Qe DialogResult dlgRslt = DialogResult.OK; Query q = null; QueryTable qt = null; QueryColumn simScoreQc, structQc; // query column containing latest query settings MetaTable mt = null; MetaColumn keyMc = null, structMc = null, dbSetMc = null, simScoreMc = null, mc; MetaColumnType storageType; string txt, tok; if (DatabasesToSearch.Text == "") { MessageBoxMx.ShowError("Databases to search must be defined."); DatabasesToSearch.Focus(); return(DialogResult.Cancel); } // Get list of databases string[] dba = DatabasesToSearch.Text.Split(','); List <MetaTable> tables = new List <MetaTable>(); foreach (string dbLabel0 in dba) { string dbLabel = dbLabel0.Trim(); RootTable dbInfo = RootTable.GetFromTableLabel(dbLabel); if (dbInfo == null) { MessageBoxMx.ShowError("Can't find database " + DatabasesToSearch.Text); DatabasesToSearch.Focus(); return(DialogResult.Cancel); } mt = MetaTableCollection.Get(dbInfo.MetaTableName); if (mt == null) { MessageBoxMx.ShowError("Unable to locate parent structure table for database: " + DatabasesToSearch.Text); DatabasesToSearch.Focus(); return(DialogResult.Cancel); } if (dbSetMc == null) { dbSetMc = mt.DatabaseListMetaColumn; } tables.Add(mt); } if (dbSetMc == null) { throw new Exception("\"Databases\" metacolumn not found for any of the databases to search"); } // Validate other form values RetrieveStructures = RetrieveMatchingStructures.Checked; bool fromList = FromList.Checked; int listCidsRead = 0; int inputQueryCount = -1; if (fromList) // using list, validate list name { if (SavedListUo == null) { MessageBoxMx.ShowError("Compound list must be defined."); ListName.Focus(); return(DialogResult.Cancel); } cidList = CidListCommand.Read(SavedListUo); if (cidList == null) { MessageBoxMx.ShowError("Error reading list."); ListName.Focus(); return(DialogResult.Cancel); } inputQueryCount = cidList.Count; } else // Using SdFile, validate SdFile name { StructureFile = FileName.Text; if (StructureFile == "") { MessageBoxMx.ShowError("File must be defined."); FileName.Focus(); return(DialogResult.Cancel); } try { structureFileReader = new StreamReader(StructureFile); structureFileReader.Close(); } catch (Exception ex) { MessageBoxMx.ShowError("Can't read file: " + Lex.Dq(StructureFile)); FileName.Focus(); return(DialogResult.Cancel); } keyField = KeyField.Text; // get key, blank to use name in 1st line inputQueryCount = -1; // don't know how many queries unless we read the file (todo?) } tok = ResultsName.Text.Trim(); // name to store results under if (tok == "") { MessageBoxMx.ShowError("A name for the results must be provided."); ResultsName.Focus(); return(DialogResult.Cancel); } if (SubStruct.Checked) { Psc.SearchType = StructureSearchType.Substructure; } else if (Full.Checked) { Psc.SearchType = StructureSearchType.FullStructure; } else if (Similarity.Checked) { Psc.SearchType = StructureSearchType.MolSim; } else { throw new Exception("Unrecognized search type"); } // Write initial log entries SearchCount++; string logFileName = ClientDirs.DefaultMobiusUserDocumentsFolder + @"\Multistructure Search " + SearchCount + ".txt"; if (!UIMisc.CanWriteFileToDefaultDir(logFileName)) { return(DialogResult.Cancel); } LogStream = new StreamWriter(logFileName); if (ResultsUo == null) { ResultsUo = new UserObject(UserObjectType.Annotation); } ResultsUo.Name = tok; UserObjectTree.GetValidUserObjectTypeFolder(ResultsUo); DateTime startTime = DateTime.Now; WriteToLog("Multiple " + Psc.SearchType + " Search"); WriteToLog("Databases: " + DatabasesToSearch.Text); WriteToLog("Date: " + startTime); if (fromList) { WriteToLog("Input List: " + SavedListUo.Name); } else { WriteToLog("Input Structure File: " + StructureFile); } WriteToLog("Output List: " + ResultsUo.Name); WriteToLog("Log File: " + logFileName); WriteToLog(""); WriteToLog("Query, Match, Score"); int queryCount = 0; int matchAtLeastOneCount = 0; MoleculeMx queryStructure = null; // current structure being searched CidList matchList = new CidList(); List <MatchData> matchData = new List <MatchData>(); if (FromFile.Checked) // open SdFile as required { structureFileReader = new StreamReader(StructureFile); } // Search of structures one at a time while (true) { if (fromList) // get next structure from list { if (listCidsRead >= cidList.Count) { break; } qid = cidList[listCidsRead].Cid; listCidsRead++; if (qid.Trim() == "") { continue; } if (qid.ToLower().IndexOf(".mol") > 0 || qid.ToLower().IndexOf(".skc") > 0) { // file reference if (!File.Exists(qid)) { continue; } if (qid.ToLower().IndexOf(".mol") > 0) { queryStructure = MoleculeMx.ReadMolfile(qid); } else { queryStructure = MoleculeMx.ReadSketchFile(qid); } } else { queryStructure = MoleculeUtil.SelectMoleculeForCid(qid); } if (queryStructure == null || queryStructure.AtomCount == 0) { continue; // oops } } else // get next structure from input file { qid = null; if (StructureFile.ToLower().EndsWith(".sdf")) { List <SdFileField> fList = SdFileDao.Read(structureFileReader); if (fList == null) // end of sdFile { structureFileReader.Close(); break; } if (fList.Count == 0) { continue; } queryStructure = new MoleculeMx(MoleculeFormat.Molfile, fList[0].Data); if (queryStructure == null || queryStructure.AtomCount == 0) { continue; } if (keyField != "") // key field specified? { qid = SdFileDao.GetDataField(fList, keyField); } else // get name from 1st line of molfile { string molFile = fList[0].Data; int i1 = molFile.IndexOf("\n"); if (i1 == 0) { qid = ""; } else { qid = molFile.Substring(0, i1).Trim(); } } if (string.IsNullOrEmpty(qid)) { qid = SdFileDao.GetDataField(fList, "compound_id"); } } else // assume smiles file { string smiles = structureFileReader.ReadLine(); if (smiles == null) // end of sdFile { structureFileReader.Close(); break; } smiles = smiles.Trim(); if (smiles.Length == 0) { continue; } int i1 = smiles.IndexOf(","); // get any preceeding queryId if (i1 < 0) { i1 = smiles.IndexOf("\t"); } if (i1 >= 0) { qid = smiles.Substring(0, i1).Trim(); smiles = smiles.Substring(i1 + 1).Trim(); } queryStructure = new MoleculeMx(MoleculeFormat.Smiles, smiles); if (queryStructure == null || queryStructure.AtomCount == 0) { continue; } } if (qid == null || qid.Trim() == "") { qid = (queryCount + 1).ToString(); // be sure we have a query id } } queryCount++; // count the query if (queryStructure == null || queryStructure.AtomCount == 0) { WriteToLog("Error converting specific structure " + queryCount.ToString() + ", " + qid); continue; } queryStructure.RemoveStructureCaption(); // remove any Mobius-added caption Psc.Molecule = queryStructure; string msg = "Searching Structure: " + queryCount.ToString(); if (inputQueryCount > 0) { msg += " of " + inputQueryCount.ToString(); } msg += "\n" + "Structures with one or more matches: " + matchAtLeastOneCount.ToString() + "\n" + "Total Matches: " + matchList.Count.ToString(); Progress.Show(msg); // Do the search over the list of databases for (int ti = 0; ti < tables.Count; ti++) { mt = tables[ti]; q = new Query(); // build basic query //q.SingleStepExecution = true; // do in single step (doesn't currently return sim score) q.ShowStereoComments = false; qt = new QueryTable(mt); q.AddQueryTable(qt); qt.SelectKeyOnly(); // start selecting desired cols keyMc = mt.KeyMetaColumn; structMc = mt.FirstStructureMetaColumn; structQc = qt.GetQueryColumnByName(structMc.Name); structQc.Selected = RetrieveStructures; dbSetMc = mt.DatabaseListMetaColumn; if (dbSetMc == null) { throw new Exception("\"Databases\" metacolumn not found for table: " + mt.Label); } QueryColumn dbSetQc = qt.GetQueryColumnByName(dbSetMc.Name); dbSetQc.Selected = true; // select the database name RootTable root = RootTable.GetFromTableName(mt.Name); txt = " in (" + root.Label + ")"; dbSetQc.Criteria = dbSetMc.Name + txt; dbSetQc.CriteriaDisplay = txt; simScoreMc = mt.SimilarityScoreMetaColumn; simScoreQc = null; if (simScoreMc != null) // get sim score if it exists { simScoreQc = qt.GetQueryColumnByName(simScoreMc.Name); simScoreQc.Selected = true; // return sim score } Psc.QueryColumn = structQc; ParsedStructureCriteria psc2 = AdjustSearchForSmallWorldAsNeeded(Psc); psc2.ConvertToQueryColumnCriteria(structQc); // format the QC for the structure search DateTime t0 = DateTime.Now; //QueryEngine qe = new QueryEngine(); //qe.NextRowsMin = 1000; // minimum number of rows to prefetch //qe.NextRowsMax = -1; // maximum number of rows to prefetch //qe.NextRowsMaxTime = 10000; // max time in milliseconds for next fetch //qe.ExecuteQuery(q); qm = new QueryManager(); try { dlgRslt = qm.ExecuteQuery(ref q); } catch (Exception ex) { WriteToLog("Error searching structure: " + ex.Message + ", " + queryCount.ToString() + ", " + qid); continue; } if (dlgRslt != DialogResult.OK) { return(dlgRslt); } double executeTime = TimeOfDay.Delta(ref t0); int offset = qm.DataTableManager.KeyValueVoPos + 1; //int offset = 0; // for QE int keyPos = offset++; int strPos = RetrieveStructures ? offset++ : -1; int dbPos = offset++; int simPos = offset++; int fetchCnt = 0; while (true) { dr = qm.NextRow(); //dr = qe.NextRow(); // for Qe if (dr == null) { break; } fetchCnt++; if (fetchCnt == 1) { matchAtLeastOneCount++; // number of queries that have at least one match } MatchData md = new MatchData(); md.Qno = queryCount; md.Qid = qid; if (RetrieveStructures) { md.Qstr = "Chime=" + queryStructure.GetChimeString(); } CompoundId cid = CompoundId.ConvertTo(dr[keyPos]); md.Mid = cid.Value; if (RetrieveStructures) { MoleculeMx ms = MoleculeMx.ConvertTo(dr[strPos]); if (!NullValue.IsNull(ms)) { md.Mstr = "Chime=" + ms.GetChimeString(); } } StringMx db = StringMx.ConvertTo(dr[dbPos]); if (!NullValue.IsNull(db)) { md.Db = db.Value; } if (Psc.SearchType == StructureSearchType.MolSim) { NumberMx nex = NumberMx.ConvertTo(dr[simPos]); if (!NullValue.IsNull(nex)) { md.Score = nex.Value; } } if (matchList.Contains(cid.Value)) // already have compound id as match for other query? { if (Psc.SearchType != StructureSearchType.MolSim) { continue; // if similarity search see if more similar } CidListElement le = matchList.Get(cid.Value); // reference current score if (le.Tag > md.Score) { continue; // only replace if more similar } matchList.Remove(le.Cid); // remove from list for (int mi = 0; mi < matchData.Count; mi++) // remove from data { if (matchData[mi].Mid == md.Mid) { matchData.RemoveAt(mi); break; } } } matchList.Add(md.Mid); matchList.Get(md.Mid).Tag = md.Score; // keep score in list matchData.Add(md); // add to results txt = md.Qid + ", " + md.Mid + ", " + md.Score.ToString(); WriteToLog(txt); } // Fetch result loop double fetchTime = TimeOfDay.Delta(ref t0); } // DB loop if (Progress.CancelRequested) { Progress.Hide(); MessageBoxMx.ShowError("Search cancelled."); try { LogStream.Close(); } catch { } return(DialogResult.Cancel); } } // key loop CidListCommand.WriteCurrentList(matchList); // write the list of numbers UsageDao.LogEvent("MultipleStructSearch"); txt = "=== Multiple structure search complete ===\r\n\r\n" + "Structures Searched: " + queryCount.ToString() + "\r\n"; txt += "Structures with one or more matches: " + matchAtLeastOneCount.ToString() + "\r\n" + "Total Matches: " + matchList.Count.ToString() + "\r\n"; TimeSpan ts = DateTime.Now.Subtract(startTime); ts = new TimeSpan(ts.Hours, ts.Minutes, ts.Seconds); txt += "Total Time: " + ts + "\r\n\r\n"; WriteToLog("\r\n" + txt); try { LogStream.Close(); } catch { } if (matchList.Count == 0) { Progress.Hide(); MessageBoxMx.ShowError("No matches have been found."); return(DialogResult.Cancel); } tok = "Matching compound ids"; if (Psc.SearchType == StructureSearchType.MolSim) { tok = "Similar compound ids"; } txt += tok + " have been written to the current list: " + ResultsUo.Name + "\n" + "Log file written to: " + logFileName + "\n\n" + "Do you want to view the match results?"; DialogResult dRslt = MessageBoxMx.Show(txt, "Multiple Structure Search", MessageBoxButtons.YesNoCancel, MessageBoxIcon.Question); if (dRslt == DialogResult.Cancel) { return(DialogResult.Cancel); } else if (dRslt == DialogResult.No) // show log { SystemUtil.StartProcess(logFileName); return(DialogResult.Cancel); } // Display results Progress.Show("Formatting results..."); mt = new MetaTable(); mt.Name = "MULTSTRUCTSEARCH_" + SearchCount; mt.Label = "Multiple Structure Search " + SearchCount; mt.TableMap = "Mobius.Tools.MultStructSearch"; // plugin id mt.MetaBrokerType = MetaBrokerType.NoSql; ColumnSelectionEnum structureColumnSelection = RetrieveStructures ? ColumnSelectionEnum.Selected : ColumnSelectionEnum.Unselected; keyMc = keyMc.Clone(); keyMc.Name = "MatchingCid"; keyMc.Label = "Matching Compound Id"; mt.AddMetaColumn(keyMc); structMc = structMc.Clone(); structMc.Name = "MatchingStructure"; structMc.Label = "Matching Structure"; structMc.InitialSelection = structureColumnSelection; mt.AddMetaColumn(structMc); dbSetMc = dbSetMc.Clone(); dbSetMc.Name = "Database"; mt.AddMetaColumn(dbSetMc); //if (DatabasesToSearch.Text.Contains(",")) dbSetMc.InitialSelection = ColumnSelectionEnum.Selected; mc = mt.AddMetaColumn("Molsimilarity", "Similarity Search Score", MetaColumnType.Number, ColumnSelectionEnum.Unselected, 10); if (Psc.SearchType == StructureSearchType.MolSim) { mc.InitialSelection = ColumnSelectionEnum.Selected; } mc = mt.AddMetaColumn("QueryNo", "Query Number", MetaColumnType.Integer); //mc = mt.AddMetaColumn("QueryMatchNo", "Query Match Number", MetaColumnType.Integer); mc = mt.AddMetaColumn("QueryId", "Query Id", MetaColumnType.String); mc = mt.AddMetaColumn("QueryStructure", "Query Structure", MetaColumnType.Structure); mc.InitialSelection = structureColumnSelection; q = ToolHelper.InitEmbeddedDataToolQuery(mt); dt = q.ResultsDataTable as DataTableMx; for (int mi = 0; mi < matchData.Count; mi++) { MatchData md = matchData[mi]; dr = dt.NewRow(); dr[qt.Alias + ".MatchingCid"] = new CompoundId(md.Mid); if (RetrieveStructures) { dr[qt.Alias + ".MatchingStructure"] = new MoleculeMx(MoleculeFormat.Chime, md.Mstr); } dr[qt.Alias + ".Database"] = new StringMx(md.Db); if (Psc.SearchType == StructureSearchType.MolSim) { dr[qt.Alias + ".Molsimilarity"] = new NumberMx(md.Score); } dr[qt.Alias + ".QueryNo"] = new NumberMx(md.Qno); dr[qt.Alias + ".QueryId"] = new StringMx(md.Qid); if (RetrieveStructures) { dr[qt.Alias + ".QueryStructure"] = new MoleculeMx(MoleculeFormat.Chime, md.Qstr); } dt.Rows.Add(dr); } ToolHelper.DisplayData(q, dt, true); Progress.Hide(); return(DialogResult.OK); }
/// <summary> /// Sync the Mobius CorpMoltable replicate used to retrieve Smiles /// Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>] /// </summary> /// <returns></returns> public static string UpdateCorpDbMoltableMx( string args) { DateTime moleculeDateTime = DateTime.MinValue; double mw; string msg = "", sql = "", maxCorpIdSql, mf = "", chime = "", smiles = "", checkPointDate = "", helm = "", sequence = "", svg = ""; object[][] pva = null; int pvaCount = 0, CorpId, lowCorpId = 0, highCorpId = 0, srcMaxCorpId = 0; int SelectChunkSize = 20; // small chunks int InsertBufferSize = 10; //int SelectChunkSize = 100000; // big chunks //int InsertBufferSize = 1000; // Select data from corp_moltable by CorpId range const string SelectByCorpIdRange = @" SELECT m.corp_nbr, chime(m.ctab), m.molformula, m.molweight, null molsmiles, s.helm_txt, s.sequence_txt, m.molecule_date FROM corp_owner.corp_moltable m, corp_owner.corp_substance s where m.corp_nbr > 0 and s.corp_nbr = m.corp_nbr and (s.status_code is null or s.status_code = 'A') ORDER BY corp_nbr"; // Select data from corp_moltable by date range comparing to corp_moltable_mx const string SelectByDateRange = @" select m.corp_nbr, chime(m.ctab), m.molformula, m.molweight, null molsmiles, s.helm_txt, s.sequence_txt, m.molecule_date, m2.molecule_date from corp_owner.corp_moltable m, corp_owner.corp_substance s, corp_moltable_mx m2 where m.molecule_date > to_date('1-jan-1900 000000','DD-MON-YYYY HH24MISS') and s.corp_nbr = M.CORP_NBR and (s.status_code is null or s.status_code = 'A') and m2.corp_nbr (+) = m.corp_nbr and m2.molecule_date (+) != m.molecule_date order by m.molecule_date" ; // Select for missing smiles strings, ex: Update CorpDbMoltableMx LoadMissing mx.molecule_date > '1-jan-2014' const string SelectMissingSmilesFix = @" select /* check for CorpIds in corp_moltable not in corp_moltable_mx */ corp_nbr, chime(ctab), molformula, molweight, null molsmiles, helm_txt, sequence_txt, molecule_date from ( select m.*, s.helm_txt, s.sequence_txt, mx.molsmiles molsmiles_mx from corp_owner.corp_moltable m, corp_owner.corp_substance s, corp_moltable_mx mx where s.corp_nbr = M.CORP_NBR and (s.status_code is null or s.status_code = 'A') and mx.corp_nbr (+) = m.corp_nbr and 1=1 /* condition to substitute */ ) m where molsmiles_mx is null /* extra condition */ order by corp_nbr" ; // Insert missing helm info const string SelectMissingHelmFix = @" select /* check for CorpIds in corp_moltable not in corp_moltable_mx */ corp_nbr, chime(ctab), molformula, molweight, null molsmiles, helm_txt, sequence_txt, molecule_date from ( select m.*, s.helm_txt, s.sequence_txt, mx.molsmiles molsmiles_mx from corp_owner.corp_moltable m, corp_owner.corp_substance s, corp_moltable_mx mx where s.corp_nbr = M.CORP_NBR and (s.status_code is null or s.status_code = 'A') and mx.corp_nbr (+) = m.corp_nbr and 1=1 /* condition to substitute */ ) m where length(helm_txt) > 0 /* extra condition */ order by corp_nbr" ; // Secondary "large" structure table (~5k mols) const string SelectLargeMols = @" select corp_nbr, to_clob(molstructure), to_clob(molformula), molweight, molsmiles, null helm_txt, null sequence_txt, molecule_date from (select corp_srl_nbr corp_nbr, 'CompoundId=' || corp_srl_nbr molstructure, null ctab, mlclr_frml_txt molformula, mlclr_wgt molweight, null molsmiles, null molecule_date from rdm_owner.rdm_sbstnc where rdw_src_cd = 'LRG'" ; // Insert statement const string InsertSql = @" insert into mbs_owner.corp_moltable_mx ( corp_nbr, molstructure, molformula, molweight, molsmiles, molecule_date) values (:0, :1, :2, :3, :4, :5)" ; // Build select sql bool byDateRange = false, byCorpIdRange = false, missingFix = true, deleteExisting = true; string missingFixCriteria = ""; if (Lex.IsUndefined(args) || Lex.Eq(args, "ByDateRange")) { byDateRange = true; } else if (Lex.Eq(args, "ByCorpIdRange")) { byCorpIdRange = true; Progress.Show("Getting range of CorpIds to insert..."); maxCorpIdSql = "select max(corp_nbr) from corp_owner.corp_moltable"; // get highest CorpId in source db srcMaxCorpId = SelectSingleValueDao.SelectInt(maxCorpIdSql); if (srcMaxCorpId < 0) { srcMaxCorpId = 0; } maxCorpIdSql = "select max(corp_nbr) from mbs_owner.corp_moltable_mx"; // get highest CorpId in dest db highCorpId = SelectSingleValueDao.SelectInt(maxCorpIdSql); if (highCorpId < 0) { highCorpId = 0; } } else if (Lex.StartsWith(args, "LoadMissing")) { missingFix = true; if (args.Contains(" ")) { missingFixCriteria = args.Substring(10).Trim(); } } else if (int.TryParse(args, out srcMaxCorpId)) // single CorpId { byCorpIdRange = true; highCorpId = srcMaxCorpId - 1; // say 1 less is the max we have } else { return("Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>]"); } Log("UpdateCorpDbMoltableMx started: " + args); int readCount = 0, insCount = 0, insertCount = 0, updateCount = 0, undefinedStructures = 0, smilesSuccess = 0, smilesFails = 0, helmStructures = 0; List <string> CorpIdList = new List <string>(); for (int chunk = 1; ; chunk++) // loop over chunks { if (byDateRange) // single chunk { if (chunk > 1) { break; // break 2nd time through } checkPointDate = UserObjectDao.GetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", "01-sep-2013 000000"); //UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate); sql = Lex.Replace(SelectByDateRange, "1-jan-1900 000000", checkPointDate); msg = "Reading where date >= " + checkPointDate; } else if (byCorpIdRange) // by CorpId range { if (highCorpId >= srcMaxCorpId) { break; // done } lowCorpId = highCorpId + 1; // start of next chunk highCorpId = lowCorpId + SelectChunkSize; if (highCorpId >= srcMaxCorpId) { highCorpId = srcMaxCorpId; } sql = Lex.Replace(SelectByCorpIdRange, "corp_nbr > 0", "corp_nbr between " + lowCorpId + " and " + highCorpId); msg = "Reading: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount; } else if (missingFix) { if (chunk > 1) { break; // break 2nd time through } sql = SelectMissingHelmFix; if (Lex.IsDefined(missingFixCriteria)) // substitute any criteria { sql = Lex.Replace(sql, "1=1", missingFixCriteria); } msg = "Fixing missing data"; } Progress.Show(msg); DbCommandMx readCmd = new DbCommandMx(); readCmd.MxConn = DbConnectionMx.GetConnection("prd123"); readCmd.PrepareUsingDefinedConnection(sql, null); DbDataReader rdr = readCmd.ExecuteReader(); DbCommandMx insertCmd = new DbCommandMx(); OracleDbType[] pta = new OracleDbType[6]; pta[0] = OracleDbType.Int32; // corp_nbr pta[1] = OracleDbType.Clob; // molstructure pta[2] = OracleDbType.Clob; // molformula pta[3] = OracleDbType.Double; // molweight pta[4] = OracleDbType.Clob; // smiles pta[5] = OracleDbType.Date; // molecule_date insertCmd.Prepare(InsertSql, pta); insertCmd.BeginTransaction(); // be sure we have a transaction going pva = DbCommandMx.NewObjectArrayArray(6, InsertBufferSize); // alloc insert row array object[] vo = new object[6]; while (true) { bool readOk = rdr.Read(); if (readOk) { rdr.GetValues(vo); CorpId = readCmd.GetInt(0); // corp_nbr vo[0] = CorpId; CorpIdList.Add(CorpId.ToString()); if (!readCmd.IsNull(1)) // molstructure { chime = readCmd.GetClob(1); chime = OracleMx.ClearStringIfExceedsMaxStringSize(chime); vo[1] = chime; } else { chime = ""; } if (!readCmd.IsNull(2)) // molformula { mf = readCmd.GetClob(2); mf = OracleMx.ClearStringIfExceedsMaxStringSize(mf); vo[2] = mf; } if (!readCmd.IsNull(3)) // molweight { mw = readCmd.GetDouble(3); vo[3] = mw; } if (Lex.IsDefined(chime)) // molsmiles - calculate from chime string { MoleculeMx cs = new MoleculeMx(MoleculeFormat.Chime, chime); if (cs.AtomCount > 1) // need more than one atom { MoleculeMx cs2 = cs.ConvertTo(MoleculeFormat.Smiles); smiles = cs2.GetSmilesString(); if (Lex.IsDefined(smiles)) { smilesSuccess++; } else { Log("Smiles conversion failure for CorpId: " + CorpId); smilesFails++; } smiles = OracleMx.ClearStringIfExceedsMaxStringSize(smiles); vo[4] = smiles; } else { undefinedStructures++; } } else { undefinedStructures++; } if (!readCmd.IsNull(5)) { helm = readCmd.GetClob(5); if (Lex.IsDefined(helm)) { svg = HelmControl.GetSvg(helm); vo[1] = SvgUtil.CompressSvgString(svg); // store compressed svg in molstructure column for now helmStructures++; } } if (!readCmd.IsNull(6)) { sequence = readCmd.GetClob(6); if (Lex.IsDefined(sequence)) { // nothing yet } } moleculeDateTime = DateTime.MinValue; if (!readCmd.IsNull(7)) // molecule_date { moleculeDateTime = readCmd.GetDateTime(7); vo[5] = moleculeDateTime; } for (int pi = 0; pi < 6; pi++) // invert for insert { pva[pi][pvaCount] = vo[pi]; } if (Debug) { msg = String.Format("CorpId: {0}, mf: {1}, chime: {2}, smiles: {3}", CorpId.ToString(), mf.Length, chime.Length, smiles.Length); Log(msg); } pvaCount++; } if (pvaCount >= InsertBufferSize || (!readOk && pvaCount > 0)) // write if buffer full or at end { try { if (deleteExisting) { int delCount = DoDeletes(CorpIdList); updateCount += delCount; // count deletes as updates insertCount -= delCount; // subtract from inserts } CorpIdList.Clear(); insCount = insertCmd.ExecuteArrayNonReader(pva, ref pvaCount); insertCmd.Commit(); insertCmd.BeginTransaction(); insertCount += insCount; } catch (Exception ex) { throw new Exception(ex.Message, ex); } if (byDateRange) { string checkPointDate2 = String.Format("{0:dd-MMM-yyyy HHmmss}", moleculeDateTime); // format date time that will work with oracle UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate2); msg = "Processing where date >= " + checkPointDate + ", Reads: " + readCount + ", Inserts: " + insertCount + ", Updates: " + updateCount; } else if (byCorpIdRange) // CorpId range { msg = "Processing: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount; } else if (missingFix) { msg = "Fixing missing smiles, Updates: " + updateCount; } msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures); Progress.Show(msg); } if (!readOk) { break; } readCount++; } readCmd.Dispose(); insertCmd.Dispose(); } // end for select chunk msg = "UpdateCorpDbMoltableMx - Inserts: " + insertCount + ", Updates: " + updateCount; msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures); Log(msg); return(msg); }