Beispiel #1
0
        /// <summary>
        /// Read a sample of a table for previewing
        /// </summary>
        /// <returns></returns>

        List <string> GetPreviewSubset()
        {
            Dictionary <string, object> PreviewSubset = new Dictionary <string, object>();
            MetaTable mt  = Eqp.QueryTable.MetaTable;
            string    sql = "select /*+ first_rows */" + mt.KeyMetaColumn.ColumnMap + " from " +
                            GetSourceWithTableFilterCriteria(mt);
            DbCommandMx drd = new DbCommandMx();

            drd.Prepare(sql);
            drd.ExecuteReader();
            while (drd.Read())
            {
                if (drd.IsNull(0))
                {
                    continue;
                }
                string id  = drd.GetObject(0).ToString();
                string cid = CompoundId.Normalize(id);                 // normalize cid adding prefix as needed
                PreviewSubset[id] = null;
                if (PreviewSubset.Count >= 100)
                {
                    break;
                }
            }
            drd.Dispose();

            return(new List <string>(PreviewSubset.Keys));
        }
Beispiel #2
0
/// <summary>
/// Retrieve theoretical elemental composition value from Cassper database
/// </summary>
/// <param name="args"></param>
/// <returns></returns>

        public object GetElementalAnalysisPct(
            string cassperId,
            string aSymbol)
        {
            DbCommandMx drd = new DbCommandMx();
            string      sql =
                "select comp_pct " +
                "from cas_owner.cas_structure " +
                "where cassper_id = :0";

            drd.PrepareMultipleParameter(sql, 1);
            drd.ExecuteReader(cassperId);
            if (!drd.Read() || drd.IsNull(0))
            {
                drd.Dispose();
                return(null);
            }

            string s = drd.GetString(0);

            drd.Dispose();

            int i1 = s.ToLower().IndexOf(aSymbol.ToLower());             // find atom symbol

            if (i1 < 0)
            {
                return(null);
            }
            s  = s.Substring(i1 + 2);        // get following number
            i1 = s.IndexOf(" ");             // and any following space
            if (i1 >= 0)
            {
                s = s.Substring(0, i1);
            }
            return(Double.Parse(s));
        }
Beispiel #3
0
        static ICdkMol CdkMolUtil => StaticCdkMol.I;         // static molecule shortcut for utility methods

        /// <summary>
        /// UpdateCorpFingerprintDatabaseMx
        /// ///////////////////////////////////////////////////////
        /// Syntax: Update FingerprintDatabaseMx [Corp | ChEMBL] [MACCS | ECFP4] [Load | ByCidRange | SinceLastCheckpoint | LoadMissing | <SingleCorpId>]
        ///
        /// Corp Examples:
        ///    Update FingerprintDatabaseMx Corp MACCS Load
        ///    Update FingerprintDatabaseMx Corp MACCS LoadMissing
        ///    Update FingerprintDatabaseMx Corp MACCS SinceLastCheckpoint
        ///
        ///    Update FingerprintDatabaseMx Corp ECFP4 Load
        ///    Update FingerprintDatabaseMx Corp ECFP4 LoadMissing
        ///    Update FingerprintDatabaseMx Corp ECFP4 SinceLastCheckpoint
        ///
        /// ChEMBL Examples:
        ///    Update FingerprintDatabaseMx Chembl MACCS Load
        ///    Update FingerprintDatabaseMx Chembl MACCS LoadMissing
        ///
        ///    Update FingerprintDatabaseMx Chembl ECFP4 Load
        ///    Update FingerprintDatabaseMx Chembl ECFP4 LoadMissing
        /// ///////////////////////////////////////////////////////
        /// </summary>
        /// <param name="argString"></param>
        /// <returns></returns>

        static public string Update(
            string argString)
        {
            MoleculeMx mol;
            double     mw;
            string     chime, smiles, molString, molFile = "";
            string     msg = "", sql = "", chemblId, cid = "", maxCorpIdSql, maxIdSql2, mf, missingFixCriteria = "", CorpIdList = "";
            int        storeChunkCount = 0, CorpId, molregno, molId, lowId = 0, highId = 0, maxDestId = 0, maxSrcId = 0;
            int        readCount = 0, storeCount = 0;

            ByCheckpoint   = ByCidRange = ByCidList = LoadIfMissing = false;
            ReadChunkSize  = DefaultReadChunkSize;
            WriteChunkSize = DefaultWriteChunkSize;

            Failures = new Dictionary <string, string>();
            NewUndefinedStructureCids = new List <string>();

            LastFailure  = "";
            FailureCount = 0;

            // global try loop

            try
            {
                ////////////////////////
                /// Parse Parameters ///
                ////////////////////////

                // See which database

                argString = argString.Trim();
                if (Lex.StartsWith(argString, "Corp"))
                {
                    Database  = "Corp";
                    argString = argString.Substring(5).Trim();
                }

                else if (Lex.StartsWith(argString, "Chembl"))
                {
                    Database  = "ChEMBL";
                    argString = argString.Substring(6).Trim();
                }

                else
                {
                    return(SyntaxMsg);
                }

                // See which fingerprint type

                FingerprintType = FingerprintType.MACCS;                 // default to MACCS if type not defined

                if (Lex.TryReplace(ref argString, "MACCS", ""))
                {
                    FingerprintType = FingerprintType.MACCS;
                    argString       = argString.Trim();
                }

                else if (Lex.TryReplace(ref argString, "ECFP4", ""))
                {
                    FingerprintType = FingerprintType.Circular;                     // (i.e. ECFP4)
                    argString       = argString.Trim();
                }

                FpDao = new FingerprintDao(Database, FingerprintType);
                List <FingerprintRec> fpRecList = new List <FingerprintRec>();

                string args = argString.Trim();

                string initialMsg = "Update FingerprintDatabase started: " + args;

                CidList = new List <string>();                // init empty list

                //////////////////////
                /// Corp Database ///
                //////////////////////

                if (CorpDatabase)
                {
                    if (Lex.Eq(args, "Load"))
                    {
                        ByCidRange = true;

                        ShowProgress("Getting range of CorpIds to insert...");

                        maxCorpIdSql = SelectMaxCorpId;                         // get highest id in source db
                        maxSrcId     = SelectSingleValueDao.SelectInt(maxCorpIdSql);
                        if (maxSrcId < 0)
                        {
                            maxSrcId = 0;
                        }

                        maxDestId = GetMaxDestMolId();

                        //maxIdSql2 = "select max(src_compound_id_nbr) from dev_mbs_owner.corp_uc_xref where src_id = 0"; // get highest id in UniChemDb db
                        //highCorpId = SelectSingleValueDao.SelectInt(maxIdSql2);

                        if (maxDestId < 0)
                        {
                            maxDestId = 0;
                        }
                    }

                    else if (Lex.Eq(args, "SinceLastCheckpoint"))
                    {
                        ByCheckpoint = true;

                        ShowProgress("Getting list of CorpIds updated since last checkpoint...");

                        CidList = GetNewAndModifiedCorpIdList(out CidUpdateDateDict);

                        //CidUpdateList = new List<string>(); // debug with single cmpd
                        //CidUpdateList.Add("03435269");

                        if (CidList.Count == 0)
                        {
                            return("There have been no updates since the last checkpoint");
                        }

                        initialMsg += ", CorpIds to add/update: " + CidList.Count;
                    }

                    else if (Lex.StartsWith(args, "ByCorpIdList"))
                    {
                        ByCidList  = true;
                        CorpIdList = args.Substring("ByCorpIdList".Length).Trim();
                        if (Lex.IsUndefined(CorpIdList))
                        {
                            throw new Exception("Undefined CorpId list");
                        }
                    }

                    else if (Lex.StartsWith(args, "LoadMissing"))
                    {
                        LoadIfMissing = true;
                        if (args.Contains(" "))
                        {
                            missingFixCriteria = args.Substring("LoadMissing".Length).Trim();
                        }

                        ShowProgress("Getting list of missing CorpIds...");
                        CidList = GetMissingCidList();
                        if (CidList.Count == 0)
                        {
                            return("There are no missing CorpIds");
                        }
                        initialMsg += ", Missing CorpIds: " + CidList.Count;
                    }

                    else if (int.TryParse(args, out maxSrcId))                     // single CorpId
                    {
                        ByCidRange = true;
                        maxDestId  = maxSrcId - 1;                        // say 1 less is the max we have
                    }

                    else
                    {
                        return(SyntaxMsg);
                    }
                }

                ///////////////////////
                /// ChEMBL Database ///
                ///////////////////////

                else if (ChemblDatabase)
                {
                    if (Lex.Eq(args, "Load"))
                    {
                        ByCidRange = true;

                        ShowProgress("Getting range of MolRegNos to insert...");

                        sql      = "select max(molregno) from chembl_owner.compound_struct_xxxxxx";
                        maxSrcId = SelectSingleValueDao.SelectInt(sql);
                        if (maxSrcId < 0)
                        {
                            maxSrcId = 0;
                        }

                        maxDestId = GetMaxDestMolId();
                        if (maxDestId < 0)
                        {
                            maxDestId = 0;
                        }
                    }

                    else if (Lex.StartsWith(args, "LoadMissing"))
                    {
                        LoadIfMissing = true;
                        ShowProgress("Getting list of missing ChEMBL Ids...");
                        CidList = GetMissingCidList();
                        if (CidList.Count == 0)
                        {
                            return("There are no missing Ids");
                        }
                        initialMsg += ", Missing Chembl Ids: " + CidList.Count;
                    }

                    else
                    {
                        return(SyntaxMsg);
                    }
                }

                else
                {
                    return(SyntaxMsg);
                }

                CidListOriginalCount = CidList.Count;

                Log(initialMsg);

                /////////////////////////////
                // Loop over chunks of data
                /////////////////////////////

                for (int chunk = 1; ; chunk++)
                {
                    //////////////////////
                    /// Corp Database ///
                    //////////////////////

                    if (CorpDatabase)
                    {
                        if (ByCheckpoint)                         // single chunk
                        {
                            string cidList = GetNextListChunk();
                            if (Lex.IsUndefined(cidList))
                            {
                                break;
                            }

                            sql = SelectByCorpIdCriteria;
                            sql = Lex.Replace(sql, "<CorpIdCriteria>", "in (" + cidList + ")");
                            string matchString = "order by m.corp_nbr";
                            if (!Lex.Contains(sql, matchString))
                            {
                                throw new Exception(matchString + " not found");
                            }
                            sql = Lex.Replace(sql, matchString, "order by m.molecule_date");

                            msg = "Processing " + CidListOriginalCount + " updates since " + CheckpointDateTime;
                            // + " (" + Mobius.Data.CidList.FormatCidListForDisplay(null, chunkCidList) + ")";
                        }

                        else if (ByCidRange)                         // by CorpId range
                        {
                            if (maxDestId >= maxSrcId)
                            {
                                break;                                         // done
                            }
                            lowId     = maxDestId + 1;                         // start of next chunk
                            highId    = lowId + ReadChunkSize;
                            maxDestId = highId;

                            //lowCorpId = highCorpId = 12345; // debug

                            if (highId >= maxSrcId)
                            {
                                highId = maxSrcId;
                            }
                            sql = SelectByCorpIdCriteria;
                            sql = Lex.Replace(sql, "<CorpIdCriteria>", "between " + lowId + " and " + highId);

                            msg = "Processing CorpId range: " + lowId + " to " + highId;
                        }

                        else if (ByCidList)                         // by single user-supplied CorpId list
                        {
                            if (chunk > 1)
                            {
                                break;                                        // break 2nd time through
                            }
                            sql = SelectByCorpIdCriteria;
                            sql = Lex.Replace(sql, "<CorpIdCriteria>", "in (" + CorpIdList + ")");
                            msg = "Processing CorpId list: " + CorpIdList;
                        }

                        else if (LoadIfMissing)
                        {
                            string cidList = GetNextListChunk();
                            if (Lex.IsUndefined(cidList))
                            {
                                break;                                                       // all done
                            }
                            sql = SelectByCorpIdCriteria;
                            sql = Lex.Replace(sql, "<CorpIdCriteria>", "in (" + cidList + ")");

                            msg = "Processing missing CorpId Chunk: " + Mobius.Data.CidList.FormatAbbreviatedCidListForDisplay(null, cidList) +
                                  ", Total Ids: " + CidListOriginalCount;

                            Log(msg);
                        }

                        else
                        {
                            return(SyntaxMsg);
                        }
                    }

                    ///////////////////////
                    /// ChEMBL Database ///
                    ///////////////////////

                    else if (ChemblDatabase)
                    {
                        if (ByCidRange)                         // by CID range
                        {
                            if (maxDestId >= maxSrcId)
                            {
                                break;                                         // done
                            }
                            lowId     = maxDestId + 1;                         // start of next chunk
                            highId    = lowId + ReadChunkSize;
                            maxDestId = highId;

                            //lowId =  highId = 12345; // debug

                            if (maxDestId >= maxSrcId)
                            {
                                maxDestId = maxSrcId;
                            }
                            sql = SelectChemblSql;
                            sql = Lex.Replace(sql, "<molregnoCriteria>", "between " + lowId + " and " + highId);

                            msg = "Processing ChEMBL MolRegNo range: " + lowId + " to " + highId;
                        }

                        else if (LoadIfMissing)
                        {
                            string cidList = GetNextListChunk();
                            if (Lex.IsUndefined(cidList))
                            {
                                break;                                                       // all done
                            }
                            sql = SelectByCorpIdCriteria;
                            sql = Lex.Replace(sql, "<CorpIdCriteria>", "in (" + cidList + ")");

                            msg = "Processing missing ChEMBL Id Chunk: " + Mobius.Data.CidList.FormatAbbreviatedCidListForDisplay(null, cidList) +
                                  ", Total Ids: " + CidListOriginalCount;
                        }

                        else
                        {
                            return(SyntaxMsg);
                        }
                    }

                    else
                    {
                        return(SyntaxMsg);
                    }

                    ShowProgress(msg);

                    // Execute the SQL to get the rows for the chunk

                    DbCommandMx rdr = DbCommandMx.PrepareAndExecuteReader(sql);
                    DateTime    lastShowProgressTime = DateTime.MinValue;

                    ///////////////////////////////////////////
                    /// Loop over rows in the current chunk ///
                    ///////////////////////////////////////////

                    while (true)
                    {
                        // Update progress display

                        if (DateTime.Now.Subtract(lastShowProgressTime).TotalSeconds > 1)                         // show progress
                        {
                            int storeTotalCount = storeCount + storeChunkCount;

                            string msg2 = msg + "\r\n" +
                                          "Reads: " + readCount + "\r\n" +
                                          "Undefined: " + NewUndefinedStructureCids.Count + "\r\n" +
                                          "Insert/Updates: " + storeTotalCount + "\r\n" +
                                          "Failures: " + FailureCount + "\r\n" +
                                          "Failure Types: " + Failures.Count + "\r\n" +
                                          "Last Failure: " + LastFailure;

                            ShowProgress(msg2);
                            lastShowProgressTime = DateTime.Now;
                        }

                        // Read and process next compound

                        bool readOk = rdr.Read();

                        if (readOk)
                        {
                            readCount++;

                            try
                            {
                                double   t1 = 0, t2 = 0, t3 = 0, t4 = 0;
                                DateTime t0 = DateTime.Now;
                                mol = null;
                                //t2 = TimeOfDay.Delta(ref t0);

                                //////////////////////
                                /// Corp Database ///
                                //////////////////////

                                if (CorpDatabase)
                                {
                                    CorpId = rdr.GetInt(0);                                     // corp_nbr
                                    //Log("CorpId: " + CorpId); // debug
                                    molId = CorpId;
                                    cid   = CorpId.ToString();
                                    cid   = CompoundId.NormalizeForDatabase(cid);

                                    if (!rdr.IsNull(1))                                     // be sure chime field isn't null
                                    {
                                        chime = rdr.GetClob(1);
                                        if (Lex.IsDefined(chime))
                                        {
                                            molFile = MoleculeMx.ChimeStringToMolfileString(chime);                                             // convert Chime to MolFile
                                            mol     = new MoleculeMx(MoleculeFormat.Molfile, molFile);
                                        }
                                    }

                                    MoleculeDateTime = rdr.GetDateTimeByName("Molecule_Date");                                     // Date molecule was updated in the CorpDB cartridge DB
                                }

                                ///////////////////////
                                /// ChEMBL Database ///
                                ///////////////////////

                                else                                 // chembl
                                {
                                    molId  = molregno = rdr.GetInt(0);
                                    cid    = chemblId = rdr.GetString(1);
                                    smiles = rdr.GetString(2);
                                    if (Lex.IsDefined(smiles))
                                    {
                                        mol = new MoleculeMx(MoleculeFormat.Smiles, smiles);
                                    }
                                }

                                if (MoleculeMx.IsUndefined(mol) || mol.AtomCount <= 1)
                                {
                                    NewUndefinedStructureCids.Add(cid);
                                    continue;
                                    //mol = new AtomContainer(); // write empty structure
                                }

                                bool includeOverallFingerprint = true;
                                List <BitSetFingerprint> fps   = CdkMol.BuildBitSetFingerprints(mol.MolfileString, includeOverallFingerprint, FingerprintType);

                                //t3 = TimeOfDay.Delta(ref t0);

                                foreach (BitSetFingerprint fp in fps)
                                {
                                    FingerprintRec fpr = new FingerprintRec();
                                    fpr.molId       = molId;
                                    fpr.SrcId       = SrcDbId;
                                    fpr.Cid         = cid;
                                    fpr.Cardinality = fp.cardinality();
                                    fpr.Fingerprint = fp.asBitSet().toLongArray();
                                    fpRecList.Add(fpr);
                                }

                                //t4 = TimeOfDay.Delta(ref t0);
                                t4 = t4;
                            }

                            catch (Exception ex)
                            {
                                if (!Failures.ContainsKey(ex.Message))
                                {
                                    Failures.Add(ex.Message, cid);
                                }

                                else
                                {
                                    Failures[ex.Message] += ", " + cid;
                                }

                                LastFailure = "Cid: " + cid + " - " + ex.Message;

                                Log(LastFailure);

                                //ShowProgress(ex.Message + "\r\n" + ex.StackTrace.ToString()); // debug

                                FailureCount++;

                                continue;
                            }

                            storeChunkCount++;
                        }

                        bool commitTransaction = (storeChunkCount >= WriteChunkSize || (!readOk && storeChunkCount > 0));
                        if (commitTransaction)                         // end of chunk of data to store?
                        {
                            // if updating by CheckPoint date range then merge existing data with new/updated data

                            if (ByCheckpoint)
                            {
                                if (readCount > 0 && (storeCount > 0 || FailureCount == 0))                                 // make sure not everything has failed)
                                {
                                    MergeRecordsIntoFiles(fpRecList);
                                }
                            }

                            // Simple append of records to files

                            else
                            {
                                FpDao.OpenWriters("bin", FileMode.Append);                                // open bin files for append

                                foreach (FingerprintRec fpr in fpRecList)                                 // write out buffered recs
                                {
                                    FpDao.WriteFingerprintRec(fpr);
                                }

                                FpDao.CloseWriters();

                                int cnt = fpRecList.Count;
                                if (cnt > 0)
                                {
                                    string cid1 = fpRecList[0].Cid;
                                    string cid2 = fpRecList[cnt - 1].Cid;
                                    Log("Records Appended: " + cnt + ", CIDS: " + cid1 + " - " + cid2);
                                }
                                else
                                {
                                    Log("Records Appended: 0");
                                }
                            }

                            fpRecList.Clear();

                            storeCount     += storeChunkCount;
                            storeChunkCount = 0;
                        }


                        if (!readOk)
                        {
                            break;
                        }
                    }                     // end of read loop for rows in a chunk

                    rdr.Dispose();
                }                 // end for loop of chunks

                DeleteTempFiles();

                if (LoadIfMissing)                 // update list of cids with missing structures
                {
                    ExistingUndefinedStructureCids.UnionWith(NewUndefinedStructureCids);
                    FpDao.WriteUndefinedStructuresCids(ExistingUndefinedStructureCids);
                }

                msg = "*** Update Complete ***\r\n\r\n" + msg;
                ShowProgress(msg);
                System.Threading.Thread.Sleep(100);

                string logMsg = "UpdateFingerprintDb - CIDs stored: " + storeCount + ", Undefined structures: " + NewUndefinedStructureCids.Count + ", failures: " + FailureCount + "\r\n";

                foreach (string key in Failures.Keys)
                {
                    logMsg += key + " - CIDs: " + Failures[key] + "\r\n";
                }

                Log(logMsg);

                return(logMsg);
            }             // end of main try loop

            catch (Exception ex)
            {
                Log(DebugLog.FormatExceptionMessage(ex));
                throw new Exception(ex.Message, ex);
            }
        }
Beispiel #4
0
/// <summary>
/// Retrieve any existing SVG for the list of supplied molecules
/// The Id column should contain the CorpId
/// </summary>
/// <param name="molList"></param>

        public static int SelectMoleculeListSvg(
            List <MoleculeMx> molList)
        {
            MoleculeMx mol;
            int        corpId, molSvgsFetchedCount = 0;
            string     corpIdString, molString, svg;

            const string sql = @"
		SELECT 
      corp_nbr,
      molstructure svgString
    FROM 
			mbs_owner.corp_moltable_mx
    WHERE 
      corp_nbr in (<list>)
			and molstructure is not null
		"        ;

            if (!Security.UserInfo.Privileges.CanRetrieveStructures)                     // structures allowed?
            {
                return(0);
            }

            //if (DebugMx.True) return 0; // debug, don't use existing values

            List <string> lsnList = new List <string>();
            Dictionary <string, MoleculeMx> molDict = new Dictionary <string, MoleculeMx>();

            foreach (MoleculeMx mol0 in molList)                     // set up a dict keyed by cid with mol values
            {
                if (mol0.PrimaryFormat != MoleculeFormat.Helm || Lex.IsUndefined(mol0.PrimaryValue))
                {
                    continue;
                }

                if (int.TryParse(mol0.Id, out corpId))
                {
                    molDict[mol0.Id] = mol0;
                }
            }

            if (molDict.Count == 0)
            {
                return(0);
            }

            DbCommandMx cmd = new DbCommandMx();

            cmd.PrepareListReader(sql, DbType.String);
            cmd.ExecuteListReader(new List <string>(molDict.Keys));

            while (cmd.Read())
            {
                corpId = cmd.GetInt(0);

                if (!cmd.IsNull(1))                         // molstructure
                {
                    molString = cmd.GetClob(1);

                    if (!SvgUtil.IsSvgString(molString))
                    {
                        continue;                                // skip if not SVG
                    }
                    svg = molString;                             // should be compressed format SVG

                    corpIdString = CompoundId.Normalize(corpId.ToString());

                    if (Lex.IsDefined(svg) && molDict.ContainsKey(corpIdString))
                    {
                        mol           = molDict[corpIdString];
                        mol.SvgString = svg;
                        molSvgsFetchedCount++;
                    }
                }
            }

            cmd.CloseReader();

            return(molSvgsFetchedCount);
        }
Beispiel #5
0
        /// <summary>
        /// Sync the Mobius CorpMoltable replicate used to retrieve Smiles
        /// Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>]
        /// </summary>
        /// <returns></returns>

        public static string UpdateCorpDbMoltableMx(
            string args)
        {
            DateTime moleculeDateTime = DateTime.MinValue;
            double   mw;
            string   msg = "", sql = "", maxCorpIdSql, mf = "", chime = "", smiles = "", checkPointDate = "", helm = "", sequence = "", svg = "";

            object[][] pva = null;
            int        pvaCount = 0, CorpId, lowCorpId = 0, highCorpId = 0, srcMaxCorpId = 0;

            int SelectChunkSize  = 20;            // small chunks
            int InsertBufferSize = 10;

            //int SelectChunkSize = 100000; // big chunks
            //int InsertBufferSize = 1000;

            // Select data from corp_moltable by CorpId range

            const string SelectByCorpIdRange = @" 
		SELECT 
        m.corp_nbr,
        chime(m.ctab), 
        m.molformula,
        m.molweight,
        null molsmiles,
        s.helm_txt,
        s.sequence_txt,
        m.molecule_date
    FROM 
        corp_owner.corp_moltable m,
        corp_owner.corp_substance s
    where 
        m.corp_nbr > 0
        and s.corp_nbr = m.corp_nbr
        and (s.status_code is null or s.status_code = 'A')    
    ORDER BY corp_nbr";

            // Select data from corp_moltable by date range comparing to corp_moltable_mx

            const string SelectByDateRange = @"
			select
					m.corp_nbr,
					chime(m.ctab), 
					m.molformula,
					m.molweight,
          null molsmiles,
	        s.helm_txt,
					s.sequence_txt,
					m.molecule_date,
					m2.molecule_date
			from
					corp_owner.corp_moltable m,
					corp_owner.corp_substance s,
					corp_moltable_mx m2
			where
					m.molecule_date > to_date('1-jan-1900 000000','DD-MON-YYYY HH24MISS')
					and s.corp_nbr = M.CORP_NBR
					and (s.status_code is null or s.status_code = 'A')    
					and m2.corp_nbr (+) = m.corp_nbr
					and m2.molecule_date (+) != m.molecule_date
			order by m.molecule_date"            ;

            // Select for missing smiles strings, ex: Update CorpDbMoltableMx LoadMissing mx.molecule_date > '1-jan-2014'

            const string SelectMissingSmilesFix = @"
			select /* check for CorpIds in corp_moltable not in corp_moltable_mx */
					corp_nbr,
					chime(ctab), 
					molformula,
					molweight,
					null molsmiles,
					helm_txt,
          sequence_txt,
          molecule_date
			from 
					(
					select 
						m.*, 
						s.helm_txt,
						s.sequence_txt,
						mx.molsmiles molsmiles_mx
					from
					 corp_owner.corp_moltable m,
					 corp_owner.corp_substance s,
					 corp_moltable_mx mx
					where
					 s.corp_nbr = M.CORP_NBR
					 and (s.status_code is null or s.status_code = 'A')
					 and mx.corp_nbr (+) = m.corp_nbr
					 and 1=1 /* condition to substitute */
					) m
			where molsmiles_mx is null /* extra condition */
			order by corp_nbr"            ;

// Insert missing helm info

            const string SelectMissingHelmFix = @"
			select /* check for CorpIds in corp_moltable not in corp_moltable_mx */
					corp_nbr,
					chime(ctab), 
					molformula,
					molweight,
					null molsmiles,
					helm_txt,
          sequence_txt,
          molecule_date
			from 
					(
					select 
						m.*, 
						s.helm_txt,
						s.sequence_txt,
						mx.molsmiles molsmiles_mx
					from
					 corp_owner.corp_moltable m,
					 corp_owner.corp_substance s,
					 corp_moltable_mx mx
					where
					 s.corp_nbr = M.CORP_NBR
					 and (s.status_code is null or s.status_code = 'A')
					 and mx.corp_nbr (+) = m.corp_nbr
					 and 1=1 /* condition to substitute */
					) m
			where length(helm_txt) > 0 /* extra condition */
			order by corp_nbr"            ;

            // Secondary "large" structure table (~5k mols)

            const string SelectLargeMols = @"
			select 
				corp_nbr, 
				to_clob(molstructure), 
				to_clob(molformula), 
				molweight,
				molsmiles,
				null helm_txt,
				null sequence_txt,
				molecule_date
			from
			(select
				corp_srl_nbr corp_nbr,
				'CompoundId=' || corp_srl_nbr molstructure, 
				null ctab,
				mlclr_frml_txt molformula,
				mlclr_wgt molweight,
				null molsmiles,
				null molecule_date
				from rdm_owner.rdm_sbstnc 
				where rdw_src_cd = 'LRG'"                ;

// Insert statement

            const string InsertSql = @"
			insert into mbs_owner.corp_moltable_mx (
				corp_nbr,
				molstructure,
				molformula,
				molweight,
				molsmiles,
				molecule_date)
			values (:0, :1, :2, :3, :4, :5)"            ;

// Build select sql

            bool   byDateRange = false, byCorpIdRange = false, missingFix = true, deleteExisting = true;
            string missingFixCriteria = "";

            if (Lex.IsUndefined(args) || Lex.Eq(args, "ByDateRange"))
            {
                byDateRange = true;
            }

            else if (Lex.Eq(args, "ByCorpIdRange"))
            {
                byCorpIdRange = true;

                Progress.Show("Getting range of CorpIds to insert...");
                maxCorpIdSql = "select max(corp_nbr) from corp_owner.corp_moltable";                 // get highest CorpId in source db
                srcMaxCorpId = SelectSingleValueDao.SelectInt(maxCorpIdSql);
                if (srcMaxCorpId < 0)
                {
                    srcMaxCorpId = 0;
                }

                maxCorpIdSql = "select max(corp_nbr) from mbs_owner.corp_moltable_mx";                 // get highest CorpId in dest db
                highCorpId   = SelectSingleValueDao.SelectInt(maxCorpIdSql);
                if (highCorpId < 0)
                {
                    highCorpId = 0;
                }
            }

            else if (Lex.StartsWith(args, "LoadMissing"))
            {
                missingFix = true;
                if (args.Contains(" "))
                {
                    missingFixCriteria = args.Substring(10).Trim();
                }
            }

            else if (int.TryParse(args, out srcMaxCorpId))             // single CorpId
            {
                byCorpIdRange = true;
                highCorpId    = srcMaxCorpId - 1;              // say 1 less is the max we have
            }

            else
            {
                return("Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>]");
            }

            Log("UpdateCorpDbMoltableMx started: " + args);

            int           readCount = 0, insCount = 0, insertCount = 0, updateCount = 0, undefinedStructures = 0, smilesSuccess = 0, smilesFails = 0, helmStructures = 0;
            List <string> CorpIdList = new List <string>();

            for (int chunk = 1; ; chunk++)       // loop over chunks
            {
                if (byDateRange)                 // single chunk
                {
                    if (chunk > 1)
                    {
                        break;                                // break 2nd time through
                    }
                    checkPointDate = UserObjectDao.GetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", "01-sep-2013 000000");

                    //UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate);

                    sql = Lex.Replace(SelectByDateRange, "1-jan-1900 000000", checkPointDate);

                    msg = "Reading where date >= " + checkPointDate;
                }

                else if (byCorpIdRange)                 // by CorpId range
                {
                    if (highCorpId >= srcMaxCorpId)
                    {
                        break;                                      // done
                    }
                    lowCorpId  = highCorpId + 1;                    // start of next chunk
                    highCorpId = lowCorpId + SelectChunkSize;
                    if (highCorpId >= srcMaxCorpId)
                    {
                        highCorpId = srcMaxCorpId;
                    }
                    sql = Lex.Replace(SelectByCorpIdRange, "corp_nbr > 0", "corp_nbr between " + lowCorpId + " and " + highCorpId);

                    msg = "Reading: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount;
                }

                else if (missingFix)
                {
                    if (chunk > 1)
                    {
                        break;                                // break 2nd time through
                    }
                    sql = SelectMissingHelmFix;
                    if (Lex.IsDefined(missingFixCriteria))                     // substitute any criteria
                    {
                        sql = Lex.Replace(sql, "1=1", missingFixCriteria);
                    }
                    msg = "Fixing missing data";
                }

                Progress.Show(msg);

                DbCommandMx readCmd = new DbCommandMx();
                readCmd.MxConn = DbConnectionMx.GetConnection("prd123");
                readCmd.PrepareUsingDefinedConnection(sql, null);
                DbDataReader rdr = readCmd.ExecuteReader();

                DbCommandMx insertCmd = new DbCommandMx();

                OracleDbType[] pta = new OracleDbType[6];
                pta[0] = OracleDbType.Int32;                // corp_nbr
                pta[1] = OracleDbType.Clob;                 // molstructure
                pta[2] = OracleDbType.Clob;                 // molformula
                pta[3] = OracleDbType.Double;               // molweight
                pta[4] = OracleDbType.Clob;                 // smiles
                pta[5] = OracleDbType.Date;                 // molecule_date

                insertCmd.Prepare(InsertSql, pta);
                insertCmd.BeginTransaction();                               // be sure we have a transaction going

                pva = DbCommandMx.NewObjectArrayArray(6, InsertBufferSize); // alloc insert row array
                object[] vo = new object[6];

                while (true)
                {
                    bool readOk = rdr.Read();

                    if (readOk)
                    {
                        rdr.GetValues(vo);

                        CorpId = readCmd.GetInt(0);                         // corp_nbr
                        vo[0]  = CorpId;
                        CorpIdList.Add(CorpId.ToString());

                        if (!readCmd.IsNull(1))                         // molstructure
                        {
                            chime = readCmd.GetClob(1);
                            chime = OracleMx.ClearStringIfExceedsMaxStringSize(chime);
                            vo[1] = chime;
                        }
                        else
                        {
                            chime = "";
                        }

                        if (!readCmd.IsNull(2))                         // molformula
                        {
                            mf    = readCmd.GetClob(2);
                            mf    = OracleMx.ClearStringIfExceedsMaxStringSize(mf);
                            vo[2] = mf;
                        }

                        if (!readCmd.IsNull(3))                         // molweight
                        {
                            mw    = readCmd.GetDouble(3);
                            vo[3] = mw;
                        }

                        if (Lex.IsDefined(chime))                         // molsmiles - calculate from chime string
                        {
                            MoleculeMx cs = new MoleculeMx(MoleculeFormat.Chime, chime);
                            if (cs.AtomCount > 1)                             // need more than one atom
                            {
                                MoleculeMx cs2 = cs.ConvertTo(MoleculeFormat.Smiles);
                                smiles = cs2.GetSmilesString();
                                if (Lex.IsDefined(smiles))
                                {
                                    smilesSuccess++;
                                }
                                else
                                {
                                    Log("Smiles conversion failure for CorpId: " + CorpId);
                                    smilesFails++;
                                }
                                smiles = OracleMx.ClearStringIfExceedsMaxStringSize(smiles);

                                vo[4] = smiles;
                            }
                            else
                            {
                                undefinedStructures++;
                            }
                        }
                        else
                        {
                            undefinedStructures++;
                        }

                        if (!readCmd.IsNull(5))
                        {
                            helm = readCmd.GetClob(5);
                            if (Lex.IsDefined(helm))
                            {
                                svg   = HelmControl.GetSvg(helm);
                                vo[1] = SvgUtil.CompressSvgString(svg);                                 // store compressed svg in molstructure column for now
                                helmStructures++;
                            }
                        }

                        if (!readCmd.IsNull(6))
                        {
                            sequence = readCmd.GetClob(6);
                            if (Lex.IsDefined(sequence))
                            {
                                // nothing yet
                            }
                        }

                        moleculeDateTime = DateTime.MinValue;
                        if (!readCmd.IsNull(7))                         // molecule_date
                        {
                            moleculeDateTime = readCmd.GetDateTime(7);
                            vo[5]            = moleculeDateTime;
                        }

                        for (int pi = 0; pi < 6; pi++)                         // invert for insert
                        {
                            pva[pi][pvaCount] = vo[pi];
                        }

                        if (Debug)
                        {
                            msg = String.Format("CorpId: {0}, mf: {1}, chime: {2}, smiles: {3}", CorpId.ToString(), mf.Length, chime.Length, smiles.Length);
                            Log(msg);
                        }

                        pvaCount++;
                    }

                    if (pvaCount >= InsertBufferSize || (!readOk && pvaCount > 0))                     // write if buffer full or at end
                    {
                        try
                        {
                            if (deleteExisting)
                            {
                                int delCount = DoDeletes(CorpIdList);
                                updateCount += delCount;                                 // count deletes as updates
                                insertCount -= delCount;                                 // subtract from inserts
                            }
                            CorpIdList.Clear();

                            insCount = insertCmd.ExecuteArrayNonReader(pva, ref pvaCount);
                            insertCmd.Commit();
                            insertCmd.BeginTransaction();
                            insertCount += insCount;
                        }

                        catch (Exception ex)
                        {
                            throw new Exception(ex.Message, ex);
                        }

                        if (byDateRange)
                        {
                            string checkPointDate2 = String.Format("{0:dd-MMM-yyyy HHmmss}", moleculeDateTime);                             // format date time that will work with oracle
                            UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate2);
                            msg = "Processing where date >= " + checkPointDate + ", Reads: " + readCount + ", Inserts: " + insertCount + ", Updates: " + updateCount;
                        }

                        else if (byCorpIdRange)                         // CorpId range
                        {
                            msg = "Processing: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount;
                        }

                        else if (missingFix)
                        {
                            msg = "Fixing missing smiles, Updates: " + updateCount;
                        }

                        msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures);

                        Progress.Show(msg);
                    }

                    if (!readOk)
                    {
                        break;
                    }

                    readCount++;
                }

                readCmd.Dispose();
                insertCmd.Dispose();
            }             // end for select chunk

            msg  = "UpdateCorpDbMoltableMx - Inserts: " + insertCount + ", Updates: " + updateCount;
            msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures);
            Log(msg);

            return(msg);
        }