/// <summary> /// Copy PRD Annotation Table data to DEV by groups of result ids /// </summary> /// <returns></returns> public static string CopyPrdAnnotationTableDataToDevBy_Rslt_Id() { long minRi = 1; //long minRi = 1310251601; // appro half of rows, allows index on all cols to be built long maxRi = 4331096190; long chunkSize = 100000; DbCommandMx cmd = new DbCommandMx(); cmd.MxConn = DbConnectionMx.GetConnection("DEV857"); cmd.BeginTransaction(); long totalIns = 0; for (long ri = minRi; ri <= maxRi; ri += chunkSize) { string range = (ri + 1).ToString() + " and " + (ri + chunkSize); string sql = @"insert /*+ APPEND */ into mbs_owner.mbs_adw_rslt (select * from mbs_owner.mbs_adw_rslt where rslt_id between " + range + ")"; cmd.PrepareUsingDefinedConnection(sql); int insCnt = cmd.ExecuteNonReader(); cmd.Commit(); totalIns += insCnt; string msg = "Rows copied: " + totalIns + " (" + range + ")"; Mobius.UAL.Progress.Show(msg); DebugLog.Message(msg); } cmd.Dispose(); return("Rows copied: " + totalIns); }
/// <summary> /// Commit transaction /// </summary> public void Commit() { if (DbCmd != null) { DbCmd.Commit(); } }
/// <summary> /// Update copies of CMN_ASSY_ATRBTS in other Oracle instances /// </summary> /// <returns></returns> static string UpdateCmnAssyAtrbtsCopies() { DbCommandMx dao; string sql, msg = "", errMsg = ""; int cnt; string[] dbList = new string[] { //"AssayDB1", //"AssayDB2", //"...", }; foreach (string conName in dbList) { try { dao = new DbCommandMx(); dao.MxConn = DbConnectionMx.GetConnection(conName); sql = "delete from MBS_OWNER.CMN_ASSY_ATRBTS"; dao.PrepareUsingDefinedConnection(sql); cnt = dao.ExecuteNonReader(); sql = @" insert into MBS_OWNER.CMN_ASSY_ATRBTS select * from MBS_OWNER.CMN_ASSY_ATRBTS" ; dao.PrepareUsingDefinedConnection(sql); cnt = dao.ExecuteNonReader(); dao.Commit(); dao.Dispose(); if (msg != "") { msg += ", "; } msg += conName; } catch (Exception ex) { errMsg += "Error updating " + conName + ": " + ex.Message + "\r\n"; } } if (msg != "") { msg = "Updated copies of CMN_ASSY_ATRBTS in: " + msg; } if (errMsg != "") { msg += "\r\n" + errMsg; } return(msg); }
/// <summary> /// /// <summary> /// Copy PRD Annotation Table data to DEV reording and compressing the data by method /// </summary> /// <returns></returns> public static string CopyPrdAnnotationTableDataToDev() { string mvidSql = @" select mthd_vrsn_id, count from ( select mthd_vrsn_id, count(mthd_vrsn_id) count FROM MBS_OWNER.mbs_adw_rslt WHERE mthd_vrsn_id > 0 /* = 708070 */ group by mthd_vrsn_id) order by mthd_vrsn_id "; Mobius.UAL.Progress.Show("Getting list of annotation table mtht_vrsn_ids ..."); DbCommandMx c1 = new DbCommandMx(); c1.Prepare(mvidSql); c1.ExecuteReader(); DbCommandMx cmd = new DbCommandMx(); cmd.MxConn = DbConnectionMx.GetConnection("DEV857"); cmd.BeginTransaction(); long totalIns = 0; int vmIdCnt = 0; while (c1.Read()) { long mvId = c1.GetLong(0); string sql = @"insert /*+ APPEND */ into mbs_owner.mbs_adw_rslt select * from mbs_owner.mbs_adw_rslt where mthd_vrsn_id = " + mvId + @" order by ext_cmpnd_id_nbr, rslt_grp_id, rslt_id"; cmd.PrepareUsingDefinedConnection(sql); int insCnt = cmd.ExecuteNonReader(); cmd.Commit(); totalIns += insCnt; vmIdCnt++; string msg = "Mthd_Vrsn_id: " + mvId + ", Vmids: " + vmIdCnt + ", Rows: " + totalIns; Mobius.UAL.Progress.Show(msg); DebugLog.Message(msg); } c1.Dispose(); cmd.Dispose(); return("Rows copied: " + totalIns); }
/// <summary> /// Update Common Assay Attributes table (mbs_owner.cmn_assy_atrbts) /// /// Command: Update AssayAttributesTable /// /// This command builds an entry (or two) in the cmn_assy_atrbts table for each assay /// referenced in the Mobius contents tree that reports an SP or CRC value as determined /// by the associated metafactory and available in each metatable. /// If the gene target associated with an assay can be identified then information on that /// gene is included as well. /// /// Additional gene information may come from the metadata for a source such as the results /// warehouse. /// /// Note that this function must be run under an account that has access to all restricted data so that it /// can properly see what's available and build the table. /// </summary> /// <param name="lex"></param> /// <returns></returns> public static string UpdateAssayAttributesTable(string args) { MetaTable mt; MetaColumn mc; AssayAttributes aa, aa2; Dictionary <int, int> geneIdCounts = new Dictionary <int, int>(); // genes and counts keyed by entrez gene id Dictionary <int, double> targetMapXDict; // dendogram X coord keyed by gene id Dictionary <int, double> targetMapYDict; // dendogram Y coord keyed by gene id Dictionary <string, int> targetTypeCounts = new Dictionary <string, int>(); // target types and counts Dictionary <int, string> assayTypeDict; Dictionary <int, string> assayModeDict; //UnpivotedAssayResult rr, rr2; List <string> toks; string mtName, tableNamePrefix, key, fileName, msg; bool crcExists, spExists, isSummary; int tableId, step, ri, resultTypeId, assayRowCount; Log = new LogFile(ServicesDirs.LogDir + @"\UpdateCommonAssayAttributes.log"); Log.ResetFile(); // Get list of all of the assays in the tree LogMessage("Accumulating assays..."); HashSet <string> mtNameHash = new HashSet <string>(); foreach (MetaTreeNode mtn0 in MetaTree.Nodes.Values) { if (mtn0.Type != MetaTreeNodeType.MetaTable) { continue; } mtName = mtn0.Target; if (AssayMetaData.IsAssayMetaTableName(mtName)) { mtNameHash.Add(mtName); } } bool debug = false; // set to true to debug with limited list of assays from below if (debug) { mtNameHash.Clear(); //assayHash.Add("ASSAY_1"); //assayHash.Add("ASSAY_2"); //assayHash.Add("ASSAY_3"); } // Get other informatin needed from AssayMetadata LogMessage("Reading AssayMetadata ASSAY metadata..."); Dictionary <int, AssayDbMetadata> assayMetadataAssayDict = // get assays and associated target/gene information AssayMetadataDao.GetAssayTargetGeneData(); LogMessage("Getting AssayMetadata result types..."); Dictionary <int, AssayDbResultType> resultTypeDict = AssayMetadataDao.GetResultTypeDict(); LogMessage("Getting assay types and modes..."); AssayMetadataDao.GetAssayTypesAndModes(out assayTypeDict, out assayModeDict); LogMessage("Getting gene dendogram coordinates..."); try { AssayMetadataDao.GetImageCoords(out targetMapXDict, out targetMapYDict); } catch (Exception ex) // may fail if problem with data source { LogMessage(DebugLog.FormatExceptionMessage(ex, true)); targetMapXDict = new Dictionary <int, double>(); targetMapYDict = new Dictionary <int, double>(); } // Process each assay int metatablesFound = 0, metatablesNotFound = 0; int assaysProcessed = 0; int assaysWithGenes = 0; int assaysWithGeneCoords = 0; int assaysWithTargets = 0; int assaysWithSpOnly = 0; int assaysWithCrcOnly = 0; int assaysWithNoCrcSP = 0; int assaysWithOtherTypes = 0; int assaysWithCrcAndSp = 0; int assaysWithNoKeyTypes = 0; int assaysWithProcessingErrors = 0; Dictionary <string, int> CrcAssayCnt = new Dictionary <string, int>() { { "ASSAY_DB1", 0 }, { "ASSAY_DB2", 0 }, { "ASSAY_DB3", 0 } }; Dictionary <string, int> SpAssayCnt = new Dictionary <string, int>() { { "ASSAY_DB1", 0 }, { "ASSAY_DB2", 0 }, { "ASSAY_DB3", 0 } }; Dictionary <string, int> OtherAssayCnt = new Dictionary <string, int>() { { "ASSAY_DB1", 0 }, { "ASSAY_DB2", 0 }, { "ASSAY_DB3", 0 } }; List <AssayAttributes> resultTypeRows = new List <AssayAttributes>(); List <AssayAttributes> geneRows = new List <AssayAttributes>(); List <AssayAttributes> dbRows = new List <AssayAttributes>(); string copyUpdateMsg = ""; foreach (string mtName0 in mtNameHash) { AssayMetaData assayMetaData = null; // metadata for assay bool isAssay = false; int assayIdNbr = NullValue.NullNumber; int assayId = NullValue.NullNumber; //if (assaysProcessed >= 100) break; // debug mtName = mtName0; MetaTable.ParseMetaTableName(mtName, out tableNamePrefix, out tableId, out isSummary); string resultType = ""; string rtId = ""; string assayName, assayDb; mt = MetaTableCollection.Get(mtName); // get metatable if (mt == null) { metatablesNotFound++; LogMessage("MetaTable not found: " + mtName); continue; } metatablesFound++; if (mt.Code == "") { continue; // must be single pivoted assay } assayDb = "ASSAY_DB"; // customize assayIdNbr = -1; if (UalUtil.IClient != null && UalUtil.IClient.Attended) { UAL.Progress.Show((assaysProcessed + 1).ToString() + " / " + mtNameHash.Count + " - " + mt.Name + "\r\n" + mt.Label); } aa = new AssayAttributes(); aa.AssayDatabase = assayDb; aa.AssayIdNbr = assayIdNbr; // data-source-specific assay Id aa.AssayIdTxt = mt.Name; // store ASSAY_1234 type table name aa.AssayId2 = assayId; // any associated assay id if (isAssay) { aa.AssayName = assayMetaData.Name; // name from AssayMetadata } else { aa.AssayName = MetaTable.RemoveSuffixesFromName(mt.Label); // name from metatable } if (mt.SummarizedExists) { aa.SummarizedAvailable = true; } else { aa.SummarizedAvailable = false; } if (isAssay) { if (assayTypeDict.ContainsKey(tableId)) { aa.AssayType = assayTypeDict[tableId]; } if (assayModeDict.ContainsKey(tableId)) { aa.AssayMode = assayModeDict[tableId]; } aa.AssaySource = AssayMetaData.GetAssaySource(tableId); aa.AssociationSource = "TODO"; // customize } aa.AssayStatus = "Active"; // say all active for now MetaTableStats mts = MetaTableFactory.GetStats(mtName); if (mts != null) { aa.ResultCount = (int)mts.RowCount; aa.AssayUpdateDate = mts.UpdateDateTime; } else { aa.ResultCount = 0; // assume no results if no stats } if (mt.DescriptionIsAvailable()) // use description from Mobius { aa.AssayDesc = "Y"; } if (String.IsNullOrEmpty(aa.GeneFamily)) { aa.GeneFamily = "Unknown"; // set these to "Unknown" rather than null } if (String.IsNullOrEmpty(aa.AssayType)) { aa.AssayType = "UNKNOWN"; // upper case UNKNOWN } if (String.IsNullOrEmpty(aa.AssayMode)) { aa.AssayMode = "UNKNOWN"; // upper case UNKNOWN } // Step1: Add a row for primary & any secondary results resultTypeRows.Clear(); MetaColumn firstResultCol = null, firstKeyResultCol = null, firstOtherKeyResultCol = null; string resultTypeConcType; HashSet <string> keyResultTypeCodes = new HashSet <string>(); int spCnt = 0, crcCnt = 0, otherCnt = 0; for (int mci = 0; mci < mt.MetaColumns.Count; mci++) // pick first col with result code (could also check summarization method) { mc = mt.MetaColumns[mci]; if (Lex.IsUndefined(mc.ResultCode)) { continue; // must have code defined } if (keyResultTypeCodes.Contains(mc.ResultCode)) { continue; // and not included so far } if (mc.InitialSelection != ColumnSelectionEnum.Selected) { continue; // selected only } if (firstResultCol == null) { firstResultCol = mc; } if (!IsKeyResultType(mc, out resultTypeConcType)) { continue; } if (firstKeyResultCol == null) { firstKeyResultCol = mc; } keyResultTypeCodes.Add(mc.ResultCode); aa2 = aa.Clone(); if (resultTypeRows.Count == 0) { aa2.TopLevelResult = "Y"; } else { aa2.TopLevelResult = "N"; } aa2.ResultTypeId2 = GetAssayResultTypeId(mc); // AssayMetadata result type id aa2.ResultTypeIdNbr = GetInternalResultTypeId(mc); // Internal database result type id aa2.ResultTypeIdTxt = mc.Name; // Mobius column name if (isAssay && resultTypeDict.ContainsKey(aa2.ResultTypeId2)) { aa2.ResultName = resultTypeDict[aa2.ResultTypeId2].Name; // use name from AssayMetadata result type dict } else { aa2.ResultName = mc.Label; // use label from Mobius } aa2.ResultTypeUnits = mc.Units; // result units if (Lex.Eq(resultTypeConcType, "SP")) { aa2.ResultTypeConcType = "SP"; spCnt++; } else if (Lex.Eq(resultTypeConcType, "CRC")) { aa2.ResultTypeConcType = "CRC"; crcCnt++; } else { aa2.ResultTypeConcType = ""; otherCnt++; if (firstOtherKeyResultCol == null) { firstOtherKeyResultCol = mc; } } aa2.ResultTypeConcUnits = ""; // todo resultTypeRows.Add(aa2); } if (resultTypeRows.Count >= 1) { if (crcCnt > 0) { CrcAssayCnt[assayDb]++; // count primary type by db } else if (spCnt > 0) { SpAssayCnt[assayDb]++; } else { OtherAssayCnt[assayDb]++; } if (crcCnt > 0 && spCnt == 0) { assaysWithCrcOnly++; // count overall primary/secondary types } else if (crcCnt == 0 && spCnt > 0) { assaysWithSpOnly++; } else if (crcCnt > 0 && spCnt > 0) { assaysWithCrcAndSp++; } if (crcCnt == 0 && spCnt == 0) // no SP or CRC result types { assaysWithNoCrcSP++; mc = firstKeyResultCol; LogMessage("Assay with No SP/CRC key results: " + mt.Name + "." + mc.Name + " (" + mc.ResultCode + "), " + mt.Label + "." + mc.Label); } else if (otherCnt > 0) // no SP or CRC result types { assaysWithOtherTypes++; mc = firstOtherKeyResultCol; LogMessage("Non SP/CRC key result: " + mt.Name + "." + mc.Name + " (" + mc.ResultCode + "), " + mt.Label + "." + mc.Label); } } else // no key result type { aa2 = aa.Clone(); resultTypeRows.Add(aa2); // include row for step1 OtherAssayCnt[assayDb]++; assaysWithNoKeyTypes++; LogMessage("No key result type for metatable: " + mt.Name + ", " + mt.Label); } // Build a step2 row for each target/gene geneRows.Clear(); List <AssayTarget> targets = new List <AssayTarget>(); int geneCount = 0; if (isAssay) { targets = assayMetaData.Targets; } if (targets.Count > 0) { assaysWithTargets++; } foreach (AssayTarget target in targets) { aa = new AssayAttributes(); aa.GeneFamily = target.TargetTypeShortName; // count target type occurance if (Lex.IsUndefined(aa.GeneFamily)) { aa.GeneFamily = "Unknown"; } if (!targetTypeCounts.ContainsKey(aa.GeneFamily)) { targetTypeCounts[aa.GeneFamily] = 0; } targetTypeCounts[aa.GeneFamily]++; if (target.Genes == null || target.Genes.Count == 0) // if no genes add a single target row { geneRows.Add(aa); continue; } foreach (AssayGene rg in target.Genes) { if (!Lex.IsDefined(rg.GeneSymbol)) { continue; } aa2 = aa.Clone(); geneRows.Add(aa2); aa2.GeneSymbol = rg.GeneSymbol; int.TryParse(rg.GeneId, out aa2.GeneId); if (aa2.GeneId > 0 && targetMapXDict.ContainsKey(aa2.GeneId)) { aa2.TargetMapX = targetMapXDict[aa2.GeneId]; aa2.TargetMapY = targetMapYDict[aa2.GeneId]; if (geneCount == 0) { assaysWithGeneCoords++; } } if (!geneIdCounts.ContainsKey(aa2.GeneId)) // count gene occurance { geneIdCounts[aa2.GeneId] = 0; } geneIdCounts[aa2.GeneId]++; if (geneCount == 0) { assaysWithGenes++; } geneCount++; } } if (geneRows.Count == 0) // if no step 2 rows (i.e. no targets), create a single step2 row { aa = new AssayAttributes(); geneRows.Add(aa); } // Combine key result types with target/genes for (int i1 = 0; i1 < resultTypeRows.Count; i1++) { AssayAttributes s1aa = resultTypeRows[i1]; for (int i2 = 0; i2 < geneRows.Count; i2++) { AssayAttributes s2aa = geneRows[i2]; aa = s1aa.Clone(); aa.GeneId = s2aa.GeneId; aa.GeneSymbol = s2aa.GeneSymbol; aa.GeneFamily = s2aa.GeneFamily; aa.TargetMapX = s2aa.TargetMapX; aa.TargetMapY = s2aa.TargetMapY; aa.GeneCount = geneCount; if (i2 > 0) { aa.GeneCount = -geneCount; // negative for other than 1st gene } dbRows.Add(aa); } } assaysProcessed++; } // Update table bool updateTable = true; // set to false for debug if (dbRows.Count <= 0) { LogMessage("No rows in new dataset, table not updated"); } else if (updateTable) { LogMessage("Deleting existing data..."); DbCommandMx dao = new DbCommandMx(); string sql = "delete from mbs_owner.cmn_assy_atrbts"; sql = AssayAttributesDao.AdjustAssayAttrsTableName(sql); dao.Prepare(sql); dao.BeginTransaction(); int delCnt = dao.ExecuteNonReader(); LogMessage("Inserting new data..."); int t0 = TimeOfDay.Milliseconds(); for (ri = 0; ri < dbRows.Count; ri++) { aa = dbRows[ri]; aa.Id = ri + 1; //aa.Id += 10000; // debug if (aa.GeneSymbol != null) { aa.GeneSymbol = aa.GeneSymbol.ToUpper(); // be sure key match cols are upper case } if (aa.GeneFamily != null) { aa.GeneFamily = aa.GeneFamily.ToUpper(); } if (aa.GeneFamilyTargetSymbol != null) { aa.GeneFamilyTargetSymbol = aa.GeneFamilyTargetSymbol.ToUpper(); } if (aa.ResultTypeConcType != null) { aa.ResultTypeConcType = aa.ResultTypeConcType.ToUpper(); } if (aa.AssayType != null) { aa.AssayType = aa.AssayType.ToUpper(); } if (aa.AssayMode != null) { aa.AssayMode = aa.AssayMode.ToUpper(); } AssayAttributesDao.InsertCommonAssayAttributes(aa, dao); if (TimeOfDay.Milliseconds() - t0 > 1000) { //Progress.Show("Inserting new data " + (ri + 1) + "/" + rows.Count + "..."); t0 = TimeOfDay.Milliseconds(); } } dao.Commit(); dao.Dispose(); copyUpdateMsg = UpdateCmnAssyAtrbtsCopies(); } string response = "----------------------------------\r\n" + "Assays processed: " + assaysProcessed + "\r\n" + "Assays with processing errors: " + assaysWithProcessingErrors + "\r\n" + "Rows inserted: " + dbRows.Count + "\r\n" + copyUpdateMsg + "----------------------------------\r\n" + "Assays with CRC only: " + assaysWithCrcOnly + "\r\n" + "Assays with SP only: " + assaysWithSpOnly + "\r\n" + "Assays with CRC and SP: " + assaysWithCrcAndSp + "\r\n" + "Assays with no CRC or SP: " + assaysWithNoCrcSP + "\r\n" + "Assays with non CRC/SP key types: " + assaysWithOtherTypes + "\r\n" + "Assays with no key types: " + assaysWithNoKeyTypes + "\r\n" + "----------------------------------\r\n" + "Assays with targets defined: " + assaysWithTargets + "\r\n" + "Assays with genes defined: " + assaysWithGenes + "\r\n" + "Assays with gene map coordinates: " + assaysWithGeneCoords + "\r\n" + "----------------------------------\r\n" + //"CRC Assays: " + CrcAssayCnt["ASSAY"] + "\r\n" + //"SP Assays: " + SpAssayCnt["ASSAY"] + "\r\n" + //"??? Assays: " + OtherAssayCnt["ASSAY"] + "\r\n" + "----------------------------------"; LogMessage("\r\n" + response); UAL.Progress.Hide(); return(response); }
/// <summary> /// Reorder the rows in an annotation table /// </summary> /// <param name="methodId"></param> /// <returns></returns> public static long ReorderRows(long methodId) { AnnotationDao dao = null; DbCommandMx cmd = null; string sql = null, ttName = null; int insCnt = 0; try { dao = new AnnotationDao(); dao.BeginTransaction(); cmd = dao.DbCmd; ttName = "tt_" + methodId; try // delete just in case { cmd.PrepareAndExecuteNonReader("truncate table " + ttName, logExceptions: false); cmd.PrepareAndExecuteNonReader("drop table " + ttName, logExceptions: false); } catch (Exception ex) { ex = ex; } // Create temp table & copy data for method sql = "create global temporary table " + ttName + @" on commit preserve rows as select* from mbs_owner.mbs_adw_rslt where mthd_vrsn_id = " + methodId; cmd.PrepareAndExecuteNonReader(sql); // Delete existing data sql = @"delete from mbs_owner.mbs_adw_rslt where mthd_vrsn_id = " + methodId; int delCnt = cmd.PrepareAndExecuteNonReader(sql); // Reinsert from temp table in desired order into new blocks (e.g. APPEND hint) sql = @"insert /*+ APPEND */ into mbs_owner.mbs_adw_rslt select* from " + ttName + @" order by ext_cmpnd_id_nbr, rslt_grp_id, rslt_id" ; insCnt = cmd.PrepareAndExecuteNonReader(sql); cmd.Commit(); // commit try // remove temp table { cmd.PrepareAndExecuteNonReader("truncate table " + ttName, logExceptions: false); cmd.PrepareAndExecuteNonReader("drop table " + ttName, logExceptions: false); } catch (Exception ex) { ex = ex; } dao.Dispose(); } catch (Exception ex) { string msg = "Exception for Annotation Table: " + ttName + " "; DebugLog.Message(msg + DebugLog.FormatExceptionMessage(ex)); try { cmd.Rollback(); // rollback if failed cmd.PrepareAndExecuteNonReader("truncate table " + ttName, logExceptions: false); cmd.PrepareAndExecuteNonReader("drop table " + ttName, logExceptions: false); } catch (Exception ex2) { ex2 = ex2; } try { dao.Dispose(); } catch (Exception ex3) { ex3 = ex3; } throw new Exception(msg, ex); } return(insCnt); }
/// <summary> /// Sync the Mobius CorpMoltable replicate used to retrieve Smiles /// Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>] /// </summary> /// <returns></returns> public static string UpdateCorpDbMoltableMx( string args) { DateTime moleculeDateTime = DateTime.MinValue; double mw; string msg = "", sql = "", maxCorpIdSql, mf = "", chime = "", smiles = "", checkPointDate = "", helm = "", sequence = "", svg = ""; object[][] pva = null; int pvaCount = 0, CorpId, lowCorpId = 0, highCorpId = 0, srcMaxCorpId = 0; int SelectChunkSize = 20; // small chunks int InsertBufferSize = 10; //int SelectChunkSize = 100000; // big chunks //int InsertBufferSize = 1000; // Select data from corp_moltable by CorpId range const string SelectByCorpIdRange = @" SELECT m.corp_nbr, chime(m.ctab), m.molformula, m.molweight, null molsmiles, s.helm_txt, s.sequence_txt, m.molecule_date FROM corp_owner.corp_moltable m, corp_owner.corp_substance s where m.corp_nbr > 0 and s.corp_nbr = m.corp_nbr and (s.status_code is null or s.status_code = 'A') ORDER BY corp_nbr"; // Select data from corp_moltable by date range comparing to corp_moltable_mx const string SelectByDateRange = @" select m.corp_nbr, chime(m.ctab), m.molformula, m.molweight, null molsmiles, s.helm_txt, s.sequence_txt, m.molecule_date, m2.molecule_date from corp_owner.corp_moltable m, corp_owner.corp_substance s, corp_moltable_mx m2 where m.molecule_date > to_date('1-jan-1900 000000','DD-MON-YYYY HH24MISS') and s.corp_nbr = M.CORP_NBR and (s.status_code is null or s.status_code = 'A') and m2.corp_nbr (+) = m.corp_nbr and m2.molecule_date (+) != m.molecule_date order by m.molecule_date" ; // Select for missing smiles strings, ex: Update CorpDbMoltableMx LoadMissing mx.molecule_date > '1-jan-2014' const string SelectMissingSmilesFix = @" select /* check for CorpIds in corp_moltable not in corp_moltable_mx */ corp_nbr, chime(ctab), molformula, molweight, null molsmiles, helm_txt, sequence_txt, molecule_date from ( select m.*, s.helm_txt, s.sequence_txt, mx.molsmiles molsmiles_mx from corp_owner.corp_moltable m, corp_owner.corp_substance s, corp_moltable_mx mx where s.corp_nbr = M.CORP_NBR and (s.status_code is null or s.status_code = 'A') and mx.corp_nbr (+) = m.corp_nbr and 1=1 /* condition to substitute */ ) m where molsmiles_mx is null /* extra condition */ order by corp_nbr" ; // Insert missing helm info const string SelectMissingHelmFix = @" select /* check for CorpIds in corp_moltable not in corp_moltable_mx */ corp_nbr, chime(ctab), molformula, molweight, null molsmiles, helm_txt, sequence_txt, molecule_date from ( select m.*, s.helm_txt, s.sequence_txt, mx.molsmiles molsmiles_mx from corp_owner.corp_moltable m, corp_owner.corp_substance s, corp_moltable_mx mx where s.corp_nbr = M.CORP_NBR and (s.status_code is null or s.status_code = 'A') and mx.corp_nbr (+) = m.corp_nbr and 1=1 /* condition to substitute */ ) m where length(helm_txt) > 0 /* extra condition */ order by corp_nbr" ; // Secondary "large" structure table (~5k mols) const string SelectLargeMols = @" select corp_nbr, to_clob(molstructure), to_clob(molformula), molweight, molsmiles, null helm_txt, null sequence_txt, molecule_date from (select corp_srl_nbr corp_nbr, 'CompoundId=' || corp_srl_nbr molstructure, null ctab, mlclr_frml_txt molformula, mlclr_wgt molweight, null molsmiles, null molecule_date from rdm_owner.rdm_sbstnc where rdw_src_cd = 'LRG'" ; // Insert statement const string InsertSql = @" insert into mbs_owner.corp_moltable_mx ( corp_nbr, molstructure, molformula, molweight, molsmiles, molecule_date) values (:0, :1, :2, :3, :4, :5)" ; // Build select sql bool byDateRange = false, byCorpIdRange = false, missingFix = true, deleteExisting = true; string missingFixCriteria = ""; if (Lex.IsUndefined(args) || Lex.Eq(args, "ByDateRange")) { byDateRange = true; } else if (Lex.Eq(args, "ByCorpIdRange")) { byCorpIdRange = true; Progress.Show("Getting range of CorpIds to insert..."); maxCorpIdSql = "select max(corp_nbr) from corp_owner.corp_moltable"; // get highest CorpId in source db srcMaxCorpId = SelectSingleValueDao.SelectInt(maxCorpIdSql); if (srcMaxCorpId < 0) { srcMaxCorpId = 0; } maxCorpIdSql = "select max(corp_nbr) from mbs_owner.corp_moltable_mx"; // get highest CorpId in dest db highCorpId = SelectSingleValueDao.SelectInt(maxCorpIdSql); if (highCorpId < 0) { highCorpId = 0; } } else if (Lex.StartsWith(args, "LoadMissing")) { missingFix = true; if (args.Contains(" ")) { missingFixCriteria = args.Substring(10).Trim(); } } else if (int.TryParse(args, out srcMaxCorpId)) // single CorpId { byCorpIdRange = true; highCorpId = srcMaxCorpId - 1; // say 1 less is the max we have } else { return("Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>]"); } Log("UpdateCorpDbMoltableMx started: " + args); int readCount = 0, insCount = 0, insertCount = 0, updateCount = 0, undefinedStructures = 0, smilesSuccess = 0, smilesFails = 0, helmStructures = 0; List <string> CorpIdList = new List <string>(); for (int chunk = 1; ; chunk++) // loop over chunks { if (byDateRange) // single chunk { if (chunk > 1) { break; // break 2nd time through } checkPointDate = UserObjectDao.GetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", "01-sep-2013 000000"); //UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate); sql = Lex.Replace(SelectByDateRange, "1-jan-1900 000000", checkPointDate); msg = "Reading where date >= " + checkPointDate; } else if (byCorpIdRange) // by CorpId range { if (highCorpId >= srcMaxCorpId) { break; // done } lowCorpId = highCorpId + 1; // start of next chunk highCorpId = lowCorpId + SelectChunkSize; if (highCorpId >= srcMaxCorpId) { highCorpId = srcMaxCorpId; } sql = Lex.Replace(SelectByCorpIdRange, "corp_nbr > 0", "corp_nbr between " + lowCorpId + " and " + highCorpId); msg = "Reading: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount; } else if (missingFix) { if (chunk > 1) { break; // break 2nd time through } sql = SelectMissingHelmFix; if (Lex.IsDefined(missingFixCriteria)) // substitute any criteria { sql = Lex.Replace(sql, "1=1", missingFixCriteria); } msg = "Fixing missing data"; } Progress.Show(msg); DbCommandMx readCmd = new DbCommandMx(); readCmd.MxConn = DbConnectionMx.GetConnection("prd123"); readCmd.PrepareUsingDefinedConnection(sql, null); DbDataReader rdr = readCmd.ExecuteReader(); DbCommandMx insertCmd = new DbCommandMx(); OracleDbType[] pta = new OracleDbType[6]; pta[0] = OracleDbType.Int32; // corp_nbr pta[1] = OracleDbType.Clob; // molstructure pta[2] = OracleDbType.Clob; // molformula pta[3] = OracleDbType.Double; // molweight pta[4] = OracleDbType.Clob; // smiles pta[5] = OracleDbType.Date; // molecule_date insertCmd.Prepare(InsertSql, pta); insertCmd.BeginTransaction(); // be sure we have a transaction going pva = DbCommandMx.NewObjectArrayArray(6, InsertBufferSize); // alloc insert row array object[] vo = new object[6]; while (true) { bool readOk = rdr.Read(); if (readOk) { rdr.GetValues(vo); CorpId = readCmd.GetInt(0); // corp_nbr vo[0] = CorpId; CorpIdList.Add(CorpId.ToString()); if (!readCmd.IsNull(1)) // molstructure { chime = readCmd.GetClob(1); chime = OracleMx.ClearStringIfExceedsMaxStringSize(chime); vo[1] = chime; } else { chime = ""; } if (!readCmd.IsNull(2)) // molformula { mf = readCmd.GetClob(2); mf = OracleMx.ClearStringIfExceedsMaxStringSize(mf); vo[2] = mf; } if (!readCmd.IsNull(3)) // molweight { mw = readCmd.GetDouble(3); vo[3] = mw; } if (Lex.IsDefined(chime)) // molsmiles - calculate from chime string { MoleculeMx cs = new MoleculeMx(MoleculeFormat.Chime, chime); if (cs.AtomCount > 1) // need more than one atom { MoleculeMx cs2 = cs.ConvertTo(MoleculeFormat.Smiles); smiles = cs2.GetSmilesString(); if (Lex.IsDefined(smiles)) { smilesSuccess++; } else { Log("Smiles conversion failure for CorpId: " + CorpId); smilesFails++; } smiles = OracleMx.ClearStringIfExceedsMaxStringSize(smiles); vo[4] = smiles; } else { undefinedStructures++; } } else { undefinedStructures++; } if (!readCmd.IsNull(5)) { helm = readCmd.GetClob(5); if (Lex.IsDefined(helm)) { svg = HelmControl.GetSvg(helm); vo[1] = SvgUtil.CompressSvgString(svg); // store compressed svg in molstructure column for now helmStructures++; } } if (!readCmd.IsNull(6)) { sequence = readCmd.GetClob(6); if (Lex.IsDefined(sequence)) { // nothing yet } } moleculeDateTime = DateTime.MinValue; if (!readCmd.IsNull(7)) // molecule_date { moleculeDateTime = readCmd.GetDateTime(7); vo[5] = moleculeDateTime; } for (int pi = 0; pi < 6; pi++) // invert for insert { pva[pi][pvaCount] = vo[pi]; } if (Debug) { msg = String.Format("CorpId: {0}, mf: {1}, chime: {2}, smiles: {3}", CorpId.ToString(), mf.Length, chime.Length, smiles.Length); Log(msg); } pvaCount++; } if (pvaCount >= InsertBufferSize || (!readOk && pvaCount > 0)) // write if buffer full or at end { try { if (deleteExisting) { int delCount = DoDeletes(CorpIdList); updateCount += delCount; // count deletes as updates insertCount -= delCount; // subtract from inserts } CorpIdList.Clear(); insCount = insertCmd.ExecuteArrayNonReader(pva, ref pvaCount); insertCmd.Commit(); insertCmd.BeginTransaction(); insertCount += insCount; } catch (Exception ex) { throw new Exception(ex.Message, ex); } if (byDateRange) { string checkPointDate2 = String.Format("{0:dd-MMM-yyyy HHmmss}", moleculeDateTime); // format date time that will work with oracle UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate2); msg = "Processing where date >= " + checkPointDate + ", Reads: " + readCount + ", Inserts: " + insertCount + ", Updates: " + updateCount; } else if (byCorpIdRange) // CorpId range { msg = "Processing: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount; } else if (missingFix) { msg = "Fixing missing smiles, Updates: " + updateCount; } msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures); Progress.Show(msg); } if (!readOk) { break; } readCount++; } readCmd.Dispose(); insertCmd.Dispose(); } // end for select chunk msg = "UpdateCorpDbMoltableMx - Inserts: " + insertCount + ", Updates: " + updateCount; msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures); Log(msg); return(msg); }
/// <summary> /// Load data into PubChem database /// PubChem assay data files are downloaded from the PubChem site: /// http://pubchem.ncbi.nlm.nih.gov/ using a program like SmartFTP. /// The files are in GNU Zip (.gz) format and can be unzipped with /// the following gzip commands: /// c:\gzip\gzip -d c:\pubchem\bioassay\csv\description\*.gz /// c:\gzip\gzip -d c:\pubchem\bioassay\csv\data\*.gz /// After downloading and decompression this method can be called on the files. /// </summary> /// <param name="args"></param> /// <returns></returns> public static string LoadData( string aid) { int recCount = 0; string mtName = "PubChem_aid_" + aid; MetaTable mt = MetaTableCollection.Get(mtName); if (mt == null) { return("Failed to get metatable"); } // if (Math.Sqrt(4) == 2) goto UpdateCids; string fileName = PubChemAssayDirectory + @"\CSV\Data\" + aid + ".csv"; StreamReader sr; try { sr = new StreamReader(fileName); } catch (Exception ex) { return("File not found: " + fileName); } string header = sr.ReadLine(); // read headers line List <string> headers = Csv.SplitCsvString(header); int cidIdx = -1; for (cidIdx = 0; cidIdx < headers.Count; cidIdx++) { if (headers[cidIdx].ToUpper() == "PUBCHEM_CID") { break; } } if (cidIdx >= headers.Count) { sr.Close(); return("PUBCHEM_CID column not found in data headers"); } Dictionary <string, MetaColumn> mcd = new Dictionary <string, MetaColumn>(); foreach (MetaColumn mc2 in mt.MetaColumns) { mcd[mc2.Name.ToUpper()] = mc2; // build dict for quick metacolumn lookup } DbConnectionMx conn = DbConnectionMx.MapSqlToConnection(ref PubChemWarehouseTable); conn.BeginTransaction(); // do multiple updates per transaction GenericDwDao dao = new GenericDwDao( PubChemWarehouseTable, // table for results PubChemWarehouseSeq); // sequence to use dao.BufferInserts(true); // buffer inserts for better speed SequenceDao.SetCacheSize(PubChemWarehouseSeq, 100); // number of ids to cache locally from sequence //string progressMsg = "Deleting existing data..."; int i1 = dao.DeleteTable(Int32.Parse(mt.TableFilterValues[0]), true); //if (Progress.CancelRequested()) //{ // dao.Dispose(); // return "Cancelled during data delete"; //} //Progress.Show("Loading file..."); recCount = 0; int t1 = 0; while (true) { int t2 = TimeOfDay.Milliseconds(); if (t2 - t1 > 1000) { if (Progress.CancelRequested) { dao.ExecuteBufferedInserts(); conn.Commit(); conn.Close(); sr.Close(); Progress.Hide(); return(recCount.ToString() + " rows loaded"); } Progress.Show("Loading file (" + recCount.ToString() + ") ..."); t1 = t2; } string rec = sr.ReadLine(); if (rec == null) { break; } List <string> vals = Csv.SplitCsvString(rec); int cid; try { cid = Int32.Parse(vals[cidIdx]); } // get compound id catch (Exception ex) { string txtCid = vals[cidIdx]; if (txtCid == null) { txtCid = ""; } DebugLog.Message("Load PubChem bad CID " + txtCid + ", AID = " + aid); continue; } long rslt_grp_id = dao.GetNextIdLong(); // id to hold row together for (int vi = 0; vi < vals.Count; vi++) { string s = vals[vi]; if (s == "") { continue; } string[] sa = rec.Split(','); if (vi >= headers.Count) { continue; } string mcName = headers[vi].ToUpper(); if (mcName.Length > 26) { mcName = mcName.Substring(0, 26); // limit length to 26 } if (mcName == "PUBCHEM_CID") { continue; } if (Lex.IsInteger(mcName)) { mcName = "R_" + mcName; // result number } MetaColumn mc = mcd[mcName]; if (mc == null) { continue; } AnnotationVo vo = new AnnotationVo(); vo.rslt_grp_id = rslt_grp_id; if (mc.DataType == MetaColumnType.String) { vo.rslt_val_txt = s; } else if (mc.DataType == MetaColumnType.Number || mc.DataType == MetaColumnType.Integer) { try { vo.rslt_val_nbr = Convert.ToDouble(s); } catch (Exception e) { continue; } // just continue if bad } else if (mc.DataType == MetaColumnType.Date) { s = DateTimeMx.Normalize(s); if (s == null) { continue; } vo.rslt_val_dt = DateTimeMx.NormalizedToDateTime(s); } else if (mc.Name == "PUBCHEM_ACTIVITY_OUTCOME") // activity outcome is a dict value stored as an integer { try { vo.rslt_val_nbr = Convert.ToInt32(s); } catch (Exception e) { continue; } // just continue if bad } else if (mc.DataType == MetaColumnType.Hyperlink || mc.DataType == MetaColumnType.DictionaryId) { vo.rslt_val_txt = s; } else { continue; } vo.ext_cmpnd_id_nbr = cid; vo.ext_cmpnd_id_txt = cid.ToString(); vo.mthd_vrsn_id = Int32.Parse(mt.TableFilterValues[0]); vo.rslt_typ_id = Int32.Parse(mc.PivotValues[0]); vo.chng_op_cd = "I"; vo.chng_usr_id = Security.UserInfo.UserName; dao.Insert(vo); } // end of field loop recCount++; if (recCount % 100 == 0) { // commit after group of updates dao.ExecuteBufferedInserts(); conn.Commit(); conn.BeginTransaction(); // do multiple updates per transaction } } // end of record loop dao.ExecuteBufferedInserts(); conn.Commit(); conn.Close(); dao.Dispose(); sr.Close(); //UpdateCids: // Add any missing CIDs under method 1000000 Progress.Show("Updating CID table..."); string sql = "INSERT INTO " + PubChemWarehouseTable + "(ext_cmpnd_id_nbr,rslt_id,mthd_vrsn_id,rslt_typ_id,rslt_grp_id) " + "SELECT ext_cmpnd_id_nbr, " + PubChemWarehouseSeq + ".NEXTVAL,1000000,0,0 " + "FROM ( " + "SELECT UNIQUE ext_cmpnd_id_nbr " + "FROM " + PubChemWarehouseTable + " r1 " + "WHERE mthd_vrsn_id = " + aid + " " + "AND NOT EXISTS ( " + " SELECT * " + "FROM " + PubChemWarehouseTable + " r2 " + "WHERE mthd_vrsn_id = 1000000 " + "AND r2.ext_cmpnd_id_nbr = r1.ext_cmpnd_id_nbr) " + "and rownum <= 10000)"; DbCommandMx drd = new DbCommandMx(); drd.Prepare(sql); drd.BeginTransaction(); int newCids = 0; while (true) { int addedCids = drd.ExecuteNonReader(); if (addedCids == 0) { break; } newCids += addedCids; drd.Commit(); drd.BeginTransaction(); // do multiple updates per transaction Progress.Show("Updating CID table (" + newCids.ToString() + ")..."); } drd.Dispose(); Progress.Hide(); return(recCount.ToString() + " rows loaded for AID " + aid + " plus " + newCids.ToString() + " new CIDs"); }