Exemple #1
0
        /// <summary>
        /// Begin a transaction
        /// </summary>

        public void BeginTransaction()
        {
            if (DbCmd == null || DbCmd.MxConn == null || DbCmd.MxConn.DbConn == null)
            {
                Connect();                                                                                   // be sure we have a connection
            }
            DbCmd.BeginTransaction();
            return;
        }
Exemple #2
0
        /// <summary>
        /// Copy PRD Annotation Table data to DEV by groups of result ids
        /// </summary>
        /// <returns></returns>

        public static string CopyPrdAnnotationTableDataToDevBy_Rslt_Id()
        {
            long minRi = 1;
            //long minRi = 1310251601; // appro half of rows, allows index on all cols to be built
            long maxRi     = 4331096190;
            long chunkSize = 100000;

            DbCommandMx cmd = new DbCommandMx();

            cmd.MxConn = DbConnectionMx.GetConnection("DEV857");
            cmd.BeginTransaction();

            long totalIns = 0;

            for (long ri = minRi; ri <= maxRi; ri += chunkSize)
            {
                string range = (ri + 1).ToString() + " and " + (ri + chunkSize);
                string sql   =
                    @"insert /*+ APPEND */ into mbs_owner.mbs_adw_rslt 
  (select * from mbs_owner.mbs_adw_rslt
  where rslt_id between " + range + ")";
                cmd.PrepareUsingDefinedConnection(sql);
                int insCnt = cmd.ExecuteNonReader();
                cmd.Commit();
                totalIns += insCnt;
                string msg = "Rows copied: " + totalIns + " (" + range + ")";
                Mobius.UAL.Progress.Show(msg);
                DebugLog.Message(msg);
            }

            cmd.Dispose();
            return("Rows copied: " + totalIns);
        }
Exemple #3
0
        /// <summary>
        /// /// <summary>
        /// Copy PRD Annotation Table data to DEV reording and compressing the data by method
        /// </summary>
        /// <returns></returns>

        public static string CopyPrdAnnotationTableDataToDev()
        {
            string mvidSql = @"
select mthd_vrsn_id, count from 
(
select mthd_vrsn_id, count(mthd_vrsn_id) count 
 FROM MBS_OWNER.mbs_adw_rslt
 WHERE mthd_vrsn_id > 0 /* = 708070 */
 group by mthd_vrsn_id)
order by mthd_vrsn_id
";

            Mobius.UAL.Progress.Show("Getting list of annotation table mtht_vrsn_ids ...");
            DbCommandMx c1 = new DbCommandMx();

            c1.Prepare(mvidSql);
            c1.ExecuteReader();

            DbCommandMx cmd = new DbCommandMx();

            cmd.MxConn = DbConnectionMx.GetConnection("DEV857");
            cmd.BeginTransaction();
            long totalIns = 0;
            int  vmIdCnt  = 0;

            while (c1.Read())
            {
                long mvId = c1.GetLong(0);

                string sql =
                    @"insert /*+ APPEND */ into mbs_owner.mbs_adw_rslt 
  select * from mbs_owner.mbs_adw_rslt
  where mthd_vrsn_id = " + mvId + @" 
  order by ext_cmpnd_id_nbr, rslt_grp_id, rslt_id";
                cmd.PrepareUsingDefinedConnection(sql);
                int insCnt = cmd.ExecuteNonReader();
                cmd.Commit();
                totalIns += insCnt;
                vmIdCnt++;
                string msg = "Mthd_Vrsn_id: " + mvId + ", Vmids: " + vmIdCnt + ", Rows: " + totalIns;
                Mobius.UAL.Progress.Show(msg);
                DebugLog.Message(msg);
            }

            c1.Dispose();

            cmd.Dispose();
            return("Rows copied: " + totalIns);
        }
Exemple #4
0
        /// <summary>
        /// Update Common Assay Attributes table (mbs_owner.cmn_assy_atrbts)
        ///
        /// Command: Update AssayAttributesTable
        ///
        /// This command builds an entry (or two) in the cmn_assy_atrbts table for each assay
        /// referenced in the Mobius contents tree that reports an SP or CRC value as determined
        /// by the associated metafactory and available in each metatable.
        /// If the gene target associated with an assay can be identified then information on that
        /// gene is included as well.
        ///
        /// Additional gene information may come from the metadata for a source such as the results
        /// warehouse.
        ///
        /// Note that this function must be run under an account that has access to all restricted data so that it
        /// can properly see what's available and build the table.
        /// </summary>
        /// <param name="lex"></param>
        /// <returns></returns>

        public static string UpdateAssayAttributesTable(string args)
        {
            MetaTable       mt;
            MetaColumn      mc;
            AssayAttributes aa, aa2;

            Dictionary <int, int>    geneIdCounts = new Dictionary <int, int>();        // genes and counts keyed by entrez gene id
            Dictionary <int, double> targetMapXDict;                                    // dendogram X coord keyed by gene id
            Dictionary <int, double> targetMapYDict;                                    // dendogram Y coord keyed by gene id

            Dictionary <string, int> targetTypeCounts = new Dictionary <string, int>(); // target types and counts
            Dictionary <int, string> assayTypeDict;
            Dictionary <int, string> assayModeDict;

            //UnpivotedAssayResult rr, rr2;
            List <string> toks;
            string        mtName, tableNamePrefix, key, fileName, msg;
            bool          crcExists, spExists, isSummary;
            int           tableId, step, ri, resultTypeId, assayRowCount;

            Log = new LogFile(ServicesDirs.LogDir + @"\UpdateCommonAssayAttributes.log");
            Log.ResetFile();

// Get list of all of the assays in the tree

            LogMessage("Accumulating assays...");

            HashSet <string> mtNameHash = new HashSet <string>();

            foreach (MetaTreeNode mtn0 in MetaTree.Nodes.Values)
            {
                if (mtn0.Type != MetaTreeNodeType.MetaTable)
                {
                    continue;
                }

                mtName = mtn0.Target;

                if (AssayMetaData.IsAssayMetaTableName(mtName))
                {
                    mtNameHash.Add(mtName);
                }
            }

            bool debug = false;             // set to true to debug with limited list of assays from below

            if (debug)
            {
                mtNameHash.Clear();

                //assayHash.Add("ASSAY_1");
                //assayHash.Add("ASSAY_2");
                //assayHash.Add("ASSAY_3");
            }

            // Get other informatin needed from AssayMetadata

            LogMessage("Reading AssayMetadata ASSAY metadata...");
            Dictionary <int, AssayDbMetadata> assayMetadataAssayDict =            // get assays and associated target/gene information
                                                                       AssayMetadataDao.GetAssayTargetGeneData();

            LogMessage("Getting AssayMetadata result types...");
            Dictionary <int, AssayDbResultType> resultTypeDict = AssayMetadataDao.GetResultTypeDict();

            LogMessage("Getting assay types and modes...");
            AssayMetadataDao.GetAssayTypesAndModes(out assayTypeDict, out assayModeDict);

            LogMessage("Getting gene dendogram coordinates...");
            try
            {
                AssayMetadataDao.GetImageCoords(out targetMapXDict, out targetMapYDict);
            }
            catch (Exception ex)             // may fail if problem with data source
            {
                LogMessage(DebugLog.FormatExceptionMessage(ex, true));
                targetMapXDict = new Dictionary <int, double>();
                targetMapYDict = new Dictionary <int, double>();
            }


            // Process each assay

            int metatablesFound = 0, metatablesNotFound = 0;

            int assaysProcessed            = 0;
            int assaysWithGenes            = 0;
            int assaysWithGeneCoords       = 0;
            int assaysWithTargets          = 0;
            int assaysWithSpOnly           = 0;
            int assaysWithCrcOnly          = 0;
            int assaysWithNoCrcSP          = 0;
            int assaysWithOtherTypes       = 0;
            int assaysWithCrcAndSp         = 0;
            int assaysWithNoKeyTypes       = 0;
            int assaysWithProcessingErrors = 0;

            Dictionary <string, int> CrcAssayCnt = new Dictionary <string, int>()
            {
                { "ASSAY_DB1", 0 }, { "ASSAY_DB2", 0 }, { "ASSAY_DB3", 0 }
            };
            Dictionary <string, int> SpAssayCnt = new Dictionary <string, int>()
            {
                { "ASSAY_DB1", 0 }, { "ASSAY_DB2", 0 }, { "ASSAY_DB3", 0 }
            };
            Dictionary <string, int> OtherAssayCnt = new Dictionary <string, int>()
            {
                { "ASSAY_DB1", 0 }, { "ASSAY_DB2", 0 }, { "ASSAY_DB3", 0 }
            };

            List <AssayAttributes> resultTypeRows = new List <AssayAttributes>();
            List <AssayAttributes> geneRows       = new List <AssayAttributes>();
            List <AssayAttributes> dbRows         = new List <AssayAttributes>();

            string copyUpdateMsg = "";

            foreach (string mtName0 in mtNameHash)
            {
                AssayMetaData assayMetaData = null;                 // metadata for assay
                bool          isAssay       = false;
                int           assayIdNbr    = NullValue.NullNumber;
                int           assayId       = NullValue.NullNumber;

                //if (assaysProcessed >= 100) break; // debug

                mtName = mtName0;
                MetaTable.ParseMetaTableName(mtName, out tableNamePrefix, out tableId, out isSummary);

                string resultType = "";
                string rtId = "";
                string assayName, assayDb;

                mt = MetaTableCollection.Get(mtName);                 // get metatable
                if (mt == null)
                {
                    metatablesNotFound++;
                    LogMessage("MetaTable not found: " + mtName);
                    continue;
                }

                metatablesFound++;

                if (mt.Code == "")
                {
                    continue;                         // must be single pivoted assay
                }
                assayDb    = "ASSAY_DB";              // customize
                assayIdNbr = -1;

                if (UalUtil.IClient != null && UalUtil.IClient.Attended)
                {
                    UAL.Progress.Show((assaysProcessed + 1).ToString() + " / " + mtNameHash.Count + " - " + mt.Name + "\r\n" + mt.Label);
                }

                aa = new AssayAttributes();
                aa.AssayDatabase = assayDb;
                aa.AssayIdNbr    = assayIdNbr;         // data-source-specific assay Id
                aa.AssayIdTxt    = mt.Name;            // store ASSAY_1234  type table name
                aa.AssayId2      = assayId;            // any associated assay id

                if (isAssay)
                {
                    aa.AssayName = assayMetaData.Name;                     // name from AssayMetadata
                }
                else
                {
                    aa.AssayName = MetaTable.RemoveSuffixesFromName(mt.Label);                  // name from metatable
                }
                if (mt.SummarizedExists)
                {
                    aa.SummarizedAvailable = true;
                }
                else
                {
                    aa.SummarizedAvailable = false;
                }

                if (isAssay)
                {
                    if (assayTypeDict.ContainsKey(tableId))
                    {
                        aa.AssayType = assayTypeDict[tableId];
                    }

                    if (assayModeDict.ContainsKey(tableId))
                    {
                        aa.AssayMode = assayModeDict[tableId];
                    }

                    aa.AssaySource       = AssayMetaData.GetAssaySource(tableId);
                    aa.AssociationSource = "TODO";                     // customize
                }

                aa.AssayStatus = "Active";                 // say all active for now

                MetaTableStats mts = MetaTableFactory.GetStats(mtName);
                if (mts != null)
                {
                    aa.ResultCount     = (int)mts.RowCount;
                    aa.AssayUpdateDate = mts.UpdateDateTime;
                }
                else
                {
                    aa.ResultCount = 0;                  // assume no results if no stats
                }
                if (mt.DescriptionIsAvailable())         // use description from Mobius
                {
                    aa.AssayDesc = "Y";
                }

                if (String.IsNullOrEmpty(aa.GeneFamily))
                {
                    aa.GeneFamily = "Unknown";                              // set these to "Unknown" rather than null
                }
                if (String.IsNullOrEmpty(aa.AssayType))
                {
                    aa.AssayType = "UNKNOWN";                             // upper case UNKNOWN
                }
                if (String.IsNullOrEmpty(aa.AssayMode))
                {
                    aa.AssayMode = "UNKNOWN";                             // upper case UNKNOWN
                }
// Step1: Add a row for primary & any secondary results

                resultTypeRows.Clear();
                MetaColumn       firstResultCol = null, firstKeyResultCol = null, firstOtherKeyResultCol = null;
                string           resultTypeConcType;
                HashSet <string> keyResultTypeCodes = new HashSet <string>();
                int spCnt = 0, crcCnt = 0, otherCnt = 0;

                for (int mci = 0; mci < mt.MetaColumns.Count; mci++)                 // pick first col with result code (could also check summarization method)
                {
                    mc = mt.MetaColumns[mci];

                    if (Lex.IsUndefined(mc.ResultCode))
                    {
                        continue;                                                     // must have code defined
                    }
                    if (keyResultTypeCodes.Contains(mc.ResultCode))
                    {
                        continue;                                                                 // and not included so far
                    }
                    if (mc.InitialSelection != ColumnSelectionEnum.Selected)
                    {
                        continue;                                                                          // selected only
                    }
                    if (firstResultCol == null)
                    {
                        firstResultCol = mc;
                    }

                    if (!IsKeyResultType(mc, out resultTypeConcType))
                    {
                        continue;
                    }

                    if (firstKeyResultCol == null)
                    {
                        firstKeyResultCol = mc;
                    }

                    keyResultTypeCodes.Add(mc.ResultCode);

                    aa2 = aa.Clone();

                    if (resultTypeRows.Count == 0)
                    {
                        aa2.TopLevelResult = "Y";
                    }
                    else
                    {
                        aa2.TopLevelResult = "N";
                    }

                    aa2.ResultTypeId2   = GetAssayResultTypeId(mc);    // AssayMetadata result type id
                    aa2.ResultTypeIdNbr = GetInternalResultTypeId(mc); // Internal database result type id
                    aa2.ResultTypeIdTxt = mc.Name;                     // Mobius column name

                    if (isAssay && resultTypeDict.ContainsKey(aa2.ResultTypeId2))
                    {
                        aa2.ResultName = resultTypeDict[aa2.ResultTypeId2].Name;                         // use name from AssayMetadata result type dict
                    }
                    else
                    {
                        aa2.ResultName = mc.Label;                      // use label from Mobius
                    }
                    aa2.ResultTypeUnits = mc.Units;                     // result units

                    if (Lex.Eq(resultTypeConcType, "SP"))
                    {
                        aa2.ResultTypeConcType = "SP";
                        spCnt++;
                    }

                    else if (Lex.Eq(resultTypeConcType, "CRC"))
                    {
                        aa2.ResultTypeConcType = "CRC";
                        crcCnt++;
                    }

                    else
                    {
                        aa2.ResultTypeConcType = "";
                        otherCnt++;
                        if (firstOtherKeyResultCol == null)
                        {
                            firstOtherKeyResultCol = mc;
                        }
                    }

                    aa2.ResultTypeConcUnits = "";                     // todo

                    resultTypeRows.Add(aa2);
                }

                if (resultTypeRows.Count >= 1)
                {
                    if (crcCnt > 0)
                    {
                        CrcAssayCnt[assayDb]++;                                 // count primary type by db
                    }
                    else if (spCnt > 0)
                    {
                        SpAssayCnt[assayDb]++;
                    }
                    else
                    {
                        OtherAssayCnt[assayDb]++;
                    }

                    if (crcCnt > 0 && spCnt == 0)
                    {
                        assaysWithCrcOnly++;                                               // count overall primary/secondary types
                    }
                    else if (crcCnt == 0 && spCnt > 0)
                    {
                        assaysWithSpOnly++;
                    }
                    else if (crcCnt > 0 && spCnt > 0)
                    {
                        assaysWithCrcAndSp++;
                    }

                    if (crcCnt == 0 && spCnt == 0)                     // no SP or CRC result types
                    {
                        assaysWithNoCrcSP++;
                        mc = firstKeyResultCol;
                        LogMessage("Assay with No SP/CRC key results: " + mt.Name + "." + mc.Name + " (" + mc.ResultCode + "), " + mt.Label + "." + mc.Label);
                    }

                    else if (otherCnt > 0)                     // no SP or CRC result types
                    {
                        assaysWithOtherTypes++;
                        mc = firstOtherKeyResultCol;
                        LogMessage("Non SP/CRC key result: " + mt.Name + "." + mc.Name + " (" + mc.ResultCode + "), " + mt.Label + "." + mc.Label);
                    }
                }

                else                 // no key result type
                {
                    aa2 = aa.Clone();
                    resultTypeRows.Add(aa2);                     // include row for step1

                    OtherAssayCnt[assayDb]++;
                    assaysWithNoKeyTypes++;
                    LogMessage("No key result type for metatable: " + mt.Name + ", " + mt.Label);
                }

// Build a step2 row for each target/gene

                geneRows.Clear();
                List <AssayTarget> targets = new List <AssayTarget>();
                int geneCount = 0;

                if (isAssay)
                {
                    targets = assayMetaData.Targets;
                }
                if (targets.Count > 0)
                {
                    assaysWithTargets++;
                }

                foreach (AssayTarget target in targets)
                {
                    aa            = new AssayAttributes();
                    aa.GeneFamily = target.TargetTypeShortName;                     // count target type occurance
                    if (Lex.IsUndefined(aa.GeneFamily))
                    {
                        aa.GeneFamily = "Unknown";
                    }
                    if (!targetTypeCounts.ContainsKey(aa.GeneFamily))
                    {
                        targetTypeCounts[aa.GeneFamily] = 0;
                    }
                    targetTypeCounts[aa.GeneFamily]++;

                    if (target.Genes == null || target.Genes.Count == 0)                     // if no genes add a single target row
                    {
                        geneRows.Add(aa);
                        continue;
                    }

                    foreach (AssayGene rg in target.Genes)
                    {
                        if (!Lex.IsDefined(rg.GeneSymbol))
                        {
                            continue;
                        }

                        aa2 = aa.Clone();
                        geneRows.Add(aa2);

                        aa2.GeneSymbol = rg.GeneSymbol;
                        int.TryParse(rg.GeneId, out aa2.GeneId);

                        if (aa2.GeneId > 0 && targetMapXDict.ContainsKey(aa2.GeneId))
                        {
                            aa2.TargetMapX = targetMapXDict[aa2.GeneId];
                            aa2.TargetMapY = targetMapYDict[aa2.GeneId];
                            if (geneCount == 0)
                            {
                                assaysWithGeneCoords++;
                            }
                        }

                        if (!geneIdCounts.ContainsKey(aa2.GeneId))                         // count gene occurance
                        {
                            geneIdCounts[aa2.GeneId] = 0;
                        }
                        geneIdCounts[aa2.GeneId]++;

                        if (geneCount == 0)
                        {
                            assaysWithGenes++;
                        }

                        geneCount++;
                    }
                }

                if (geneRows.Count == 0)                 // if no step 2 rows (i.e. no targets), create a single step2 row
                {
                    aa = new AssayAttributes();
                    geneRows.Add(aa);
                }

// Combine key result types with target/genes

                for (int i1 = 0; i1 < resultTypeRows.Count; i1++)
                {
                    AssayAttributes s1aa = resultTypeRows[i1];
                    for (int i2 = 0; i2 < geneRows.Count; i2++)
                    {
                        AssayAttributes s2aa = geneRows[i2];

                        aa            = s1aa.Clone();
                        aa.GeneId     = s2aa.GeneId;
                        aa.GeneSymbol = s2aa.GeneSymbol;
                        aa.GeneFamily = s2aa.GeneFamily;

                        aa.TargetMapX = s2aa.TargetMapX;
                        aa.TargetMapY = s2aa.TargetMapY;

                        aa.GeneCount = geneCount;

                        if (i2 > 0)
                        {
                            aa.GeneCount = -geneCount;                                 // negative for other than 1st gene
                        }
                        dbRows.Add(aa);
                    }
                }

                assaysProcessed++;
            }

            // Update table

            bool updateTable = true;             // set to false for debug

            if (dbRows.Count <= 0)
            {
                LogMessage("No rows in new dataset, table not updated");
            }

            else if (updateTable)
            {
                LogMessage("Deleting existing data...");
                DbCommandMx dao = new DbCommandMx();
                string      sql = "delete from mbs_owner.cmn_assy_atrbts";

                sql = AssayAttributesDao.AdjustAssayAttrsTableName(sql);

                dao.Prepare(sql);
                dao.BeginTransaction();
                int delCnt = dao.ExecuteNonReader();

                LogMessage("Inserting new data...");
                int t0 = TimeOfDay.Milliseconds();
                for (ri = 0; ri < dbRows.Count; ri++)
                {
                    aa    = dbRows[ri];
                    aa.Id = ri + 1;

                    //aa.Id += 10000; // debug

                    if (aa.GeneSymbol != null)
                    {
                        aa.GeneSymbol = aa.GeneSymbol.ToUpper();                                            // be sure key match cols are upper case
                    }
                    if (aa.GeneFamily != null)
                    {
                        aa.GeneFamily = aa.GeneFamily.ToUpper();
                    }
                    if (aa.GeneFamilyTargetSymbol != null)
                    {
                        aa.GeneFamilyTargetSymbol = aa.GeneFamilyTargetSymbol.ToUpper();
                    }
                    if (aa.ResultTypeConcType != null)
                    {
                        aa.ResultTypeConcType = aa.ResultTypeConcType.ToUpper();
                    }
                    if (aa.AssayType != null)
                    {
                        aa.AssayType = aa.AssayType.ToUpper();
                    }
                    if (aa.AssayMode != null)
                    {
                        aa.AssayMode = aa.AssayMode.ToUpper();
                    }

                    AssayAttributesDao.InsertCommonAssayAttributes(aa, dao);
                    if (TimeOfDay.Milliseconds() - t0 > 1000)
                    {
                        //Progress.Show("Inserting new data " + (ri + 1) + "/" + rows.Count + "...");
                        t0 = TimeOfDay.Milliseconds();
                    }
                }

                dao.Commit();
                dao.Dispose();

                copyUpdateMsg = UpdateCmnAssyAtrbtsCopies();
            }

            string response =
                "----------------------------------\r\n" +
                "Assays processed: " + assaysProcessed + "\r\n" +
                "Assays with processing errors: " + assaysWithProcessingErrors + "\r\n" +
                "Rows inserted: " + dbRows.Count + "\r\n" +
                copyUpdateMsg +
                "----------------------------------\r\n" +
                "Assays with CRC only: " + assaysWithCrcOnly + "\r\n" +
                "Assays with SP only: " + assaysWithSpOnly + "\r\n" +
                "Assays with CRC and SP: " + assaysWithCrcAndSp + "\r\n" +
                "Assays with no CRC or SP: " + assaysWithNoCrcSP + "\r\n" +
                "Assays with non CRC/SP key types: " + assaysWithOtherTypes + "\r\n" +
                "Assays with no key types: " + assaysWithNoKeyTypes + "\r\n" +
                "----------------------------------\r\n" +
                "Assays with targets defined: " + assaysWithTargets + "\r\n" +
                "Assays with genes defined: " + assaysWithGenes + "\r\n" +
                "Assays with gene map coordinates: " + assaysWithGeneCoords + "\r\n" +
                "----------------------------------\r\n" +
                //"CRC Assays: " + CrcAssayCnt["ASSAY"] + "\r\n" +
                //"SP  Assays: " + SpAssayCnt["ASSAY"] + "\r\n" +
                //"??? Assays: " + OtherAssayCnt["ASSAY"] + "\r\n" +
                "----------------------------------";

            LogMessage("\r\n" + response);

            UAL.Progress.Hide();
            return(response);
        }
Exemple #5
0
        /// <summary>
        /// Sync the Mobius CorpMoltable replicate used to retrieve Smiles
        /// Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>]
        /// </summary>
        /// <returns></returns>

        public static string UpdateCorpDbMoltableMx(
            string args)
        {
            DateTime moleculeDateTime = DateTime.MinValue;
            double   mw;
            string   msg = "", sql = "", maxCorpIdSql, mf = "", chime = "", smiles = "", checkPointDate = "", helm = "", sequence = "", svg = "";

            object[][] pva = null;
            int        pvaCount = 0, CorpId, lowCorpId = 0, highCorpId = 0, srcMaxCorpId = 0;

            int SelectChunkSize  = 20;            // small chunks
            int InsertBufferSize = 10;

            //int SelectChunkSize = 100000; // big chunks
            //int InsertBufferSize = 1000;

            // Select data from corp_moltable by CorpId range

            const string SelectByCorpIdRange = @" 
		SELECT 
        m.corp_nbr,
        chime(m.ctab), 
        m.molformula,
        m.molweight,
        null molsmiles,
        s.helm_txt,
        s.sequence_txt,
        m.molecule_date
    FROM 
        corp_owner.corp_moltable m,
        corp_owner.corp_substance s
    where 
        m.corp_nbr > 0
        and s.corp_nbr = m.corp_nbr
        and (s.status_code is null or s.status_code = 'A')    
    ORDER BY corp_nbr";

            // Select data from corp_moltable by date range comparing to corp_moltable_mx

            const string SelectByDateRange = @"
			select
					m.corp_nbr,
					chime(m.ctab), 
					m.molformula,
					m.molweight,
          null molsmiles,
	        s.helm_txt,
					s.sequence_txt,
					m.molecule_date,
					m2.molecule_date
			from
					corp_owner.corp_moltable m,
					corp_owner.corp_substance s,
					corp_moltable_mx m2
			where
					m.molecule_date > to_date('1-jan-1900 000000','DD-MON-YYYY HH24MISS')
					and s.corp_nbr = M.CORP_NBR
					and (s.status_code is null or s.status_code = 'A')    
					and m2.corp_nbr (+) = m.corp_nbr
					and m2.molecule_date (+) != m.molecule_date
			order by m.molecule_date"            ;

            // Select for missing smiles strings, ex: Update CorpDbMoltableMx LoadMissing mx.molecule_date > '1-jan-2014'

            const string SelectMissingSmilesFix = @"
			select /* check for CorpIds in corp_moltable not in corp_moltable_mx */
					corp_nbr,
					chime(ctab), 
					molformula,
					molweight,
					null molsmiles,
					helm_txt,
          sequence_txt,
          molecule_date
			from 
					(
					select 
						m.*, 
						s.helm_txt,
						s.sequence_txt,
						mx.molsmiles molsmiles_mx
					from
					 corp_owner.corp_moltable m,
					 corp_owner.corp_substance s,
					 corp_moltable_mx mx
					where
					 s.corp_nbr = M.CORP_NBR
					 and (s.status_code is null or s.status_code = 'A')
					 and mx.corp_nbr (+) = m.corp_nbr
					 and 1=1 /* condition to substitute */
					) m
			where molsmiles_mx is null /* extra condition */
			order by corp_nbr"            ;

// Insert missing helm info

            const string SelectMissingHelmFix = @"
			select /* check for CorpIds in corp_moltable not in corp_moltable_mx */
					corp_nbr,
					chime(ctab), 
					molformula,
					molweight,
					null molsmiles,
					helm_txt,
          sequence_txt,
          molecule_date
			from 
					(
					select 
						m.*, 
						s.helm_txt,
						s.sequence_txt,
						mx.molsmiles molsmiles_mx
					from
					 corp_owner.corp_moltable m,
					 corp_owner.corp_substance s,
					 corp_moltable_mx mx
					where
					 s.corp_nbr = M.CORP_NBR
					 and (s.status_code is null or s.status_code = 'A')
					 and mx.corp_nbr (+) = m.corp_nbr
					 and 1=1 /* condition to substitute */
					) m
			where length(helm_txt) > 0 /* extra condition */
			order by corp_nbr"            ;

            // Secondary "large" structure table (~5k mols)

            const string SelectLargeMols = @"
			select 
				corp_nbr, 
				to_clob(molstructure), 
				to_clob(molformula), 
				molweight,
				molsmiles,
				null helm_txt,
				null sequence_txt,
				molecule_date
			from
			(select
				corp_srl_nbr corp_nbr,
				'CompoundId=' || corp_srl_nbr molstructure, 
				null ctab,
				mlclr_frml_txt molformula,
				mlclr_wgt molweight,
				null molsmiles,
				null molecule_date
				from rdm_owner.rdm_sbstnc 
				where rdw_src_cd = 'LRG'"                ;

// Insert statement

            const string InsertSql = @"
			insert into mbs_owner.corp_moltable_mx (
				corp_nbr,
				molstructure,
				molformula,
				molweight,
				molsmiles,
				molecule_date)
			values (:0, :1, :2, :3, :4, :5)"            ;

// Build select sql

            bool   byDateRange = false, byCorpIdRange = false, missingFix = true, deleteExisting = true;
            string missingFixCriteria = "";

            if (Lex.IsUndefined(args) || Lex.Eq(args, "ByDateRange"))
            {
                byDateRange = true;
            }

            else if (Lex.Eq(args, "ByCorpIdRange"))
            {
                byCorpIdRange = true;

                Progress.Show("Getting range of CorpIds to insert...");
                maxCorpIdSql = "select max(corp_nbr) from corp_owner.corp_moltable";                 // get highest CorpId in source db
                srcMaxCorpId = SelectSingleValueDao.SelectInt(maxCorpIdSql);
                if (srcMaxCorpId < 0)
                {
                    srcMaxCorpId = 0;
                }

                maxCorpIdSql = "select max(corp_nbr) from mbs_owner.corp_moltable_mx";                 // get highest CorpId in dest db
                highCorpId   = SelectSingleValueDao.SelectInt(maxCorpIdSql);
                if (highCorpId < 0)
                {
                    highCorpId = 0;
                }
            }

            else if (Lex.StartsWith(args, "LoadMissing"))
            {
                missingFix = true;
                if (args.Contains(" "))
                {
                    missingFixCriteria = args.Substring(10).Trim();
                }
            }

            else if (int.TryParse(args, out srcMaxCorpId))             // single CorpId
            {
                byCorpIdRange = true;
                highCorpId    = srcMaxCorpId - 1;              // say 1 less is the max we have
            }

            else
            {
                return("Syntax: UpdateCorpDbMoltableMx [ ByDateRange | ByCorpIdRange | LoadMissing | <singleCorpId>]");
            }

            Log("UpdateCorpDbMoltableMx started: " + args);

            int           readCount = 0, insCount = 0, insertCount = 0, updateCount = 0, undefinedStructures = 0, smilesSuccess = 0, smilesFails = 0, helmStructures = 0;
            List <string> CorpIdList = new List <string>();

            for (int chunk = 1; ; chunk++)       // loop over chunks
            {
                if (byDateRange)                 // single chunk
                {
                    if (chunk > 1)
                    {
                        break;                                // break 2nd time through
                    }
                    checkPointDate = UserObjectDao.GetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", "01-sep-2013 000000");

                    //UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate);

                    sql = Lex.Replace(SelectByDateRange, "1-jan-1900 000000", checkPointDate);

                    msg = "Reading where date >= " + checkPointDate;
                }

                else if (byCorpIdRange)                 // by CorpId range
                {
                    if (highCorpId >= srcMaxCorpId)
                    {
                        break;                                      // done
                    }
                    lowCorpId  = highCorpId + 1;                    // start of next chunk
                    highCorpId = lowCorpId + SelectChunkSize;
                    if (highCorpId >= srcMaxCorpId)
                    {
                        highCorpId = srcMaxCorpId;
                    }
                    sql = Lex.Replace(SelectByCorpIdRange, "corp_nbr > 0", "corp_nbr between " + lowCorpId + " and " + highCorpId);

                    msg = "Reading: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount;
                }

                else if (missingFix)
                {
                    if (chunk > 1)
                    {
                        break;                                // break 2nd time through
                    }
                    sql = SelectMissingHelmFix;
                    if (Lex.IsDefined(missingFixCriteria))                     // substitute any criteria
                    {
                        sql = Lex.Replace(sql, "1=1", missingFixCriteria);
                    }
                    msg = "Fixing missing data";
                }

                Progress.Show(msg);

                DbCommandMx readCmd = new DbCommandMx();
                readCmd.MxConn = DbConnectionMx.GetConnection("prd123");
                readCmd.PrepareUsingDefinedConnection(sql, null);
                DbDataReader rdr = readCmd.ExecuteReader();

                DbCommandMx insertCmd = new DbCommandMx();

                OracleDbType[] pta = new OracleDbType[6];
                pta[0] = OracleDbType.Int32;                // corp_nbr
                pta[1] = OracleDbType.Clob;                 // molstructure
                pta[2] = OracleDbType.Clob;                 // molformula
                pta[3] = OracleDbType.Double;               // molweight
                pta[4] = OracleDbType.Clob;                 // smiles
                pta[5] = OracleDbType.Date;                 // molecule_date

                insertCmd.Prepare(InsertSql, pta);
                insertCmd.BeginTransaction();                               // be sure we have a transaction going

                pva = DbCommandMx.NewObjectArrayArray(6, InsertBufferSize); // alloc insert row array
                object[] vo = new object[6];

                while (true)
                {
                    bool readOk = rdr.Read();

                    if (readOk)
                    {
                        rdr.GetValues(vo);

                        CorpId = readCmd.GetInt(0);                         // corp_nbr
                        vo[0]  = CorpId;
                        CorpIdList.Add(CorpId.ToString());

                        if (!readCmd.IsNull(1))                         // molstructure
                        {
                            chime = readCmd.GetClob(1);
                            chime = OracleMx.ClearStringIfExceedsMaxStringSize(chime);
                            vo[1] = chime;
                        }
                        else
                        {
                            chime = "";
                        }

                        if (!readCmd.IsNull(2))                         // molformula
                        {
                            mf    = readCmd.GetClob(2);
                            mf    = OracleMx.ClearStringIfExceedsMaxStringSize(mf);
                            vo[2] = mf;
                        }

                        if (!readCmd.IsNull(3))                         // molweight
                        {
                            mw    = readCmd.GetDouble(3);
                            vo[3] = mw;
                        }

                        if (Lex.IsDefined(chime))                         // molsmiles - calculate from chime string
                        {
                            MoleculeMx cs = new MoleculeMx(MoleculeFormat.Chime, chime);
                            if (cs.AtomCount > 1)                             // need more than one atom
                            {
                                MoleculeMx cs2 = cs.ConvertTo(MoleculeFormat.Smiles);
                                smiles = cs2.GetSmilesString();
                                if (Lex.IsDefined(smiles))
                                {
                                    smilesSuccess++;
                                }
                                else
                                {
                                    Log("Smiles conversion failure for CorpId: " + CorpId);
                                    smilesFails++;
                                }
                                smiles = OracleMx.ClearStringIfExceedsMaxStringSize(smiles);

                                vo[4] = smiles;
                            }
                            else
                            {
                                undefinedStructures++;
                            }
                        }
                        else
                        {
                            undefinedStructures++;
                        }

                        if (!readCmd.IsNull(5))
                        {
                            helm = readCmd.GetClob(5);
                            if (Lex.IsDefined(helm))
                            {
                                svg   = HelmControl.GetSvg(helm);
                                vo[1] = SvgUtil.CompressSvgString(svg);                                 // store compressed svg in molstructure column for now
                                helmStructures++;
                            }
                        }

                        if (!readCmd.IsNull(6))
                        {
                            sequence = readCmd.GetClob(6);
                            if (Lex.IsDefined(sequence))
                            {
                                // nothing yet
                            }
                        }

                        moleculeDateTime = DateTime.MinValue;
                        if (!readCmd.IsNull(7))                         // molecule_date
                        {
                            moleculeDateTime = readCmd.GetDateTime(7);
                            vo[5]            = moleculeDateTime;
                        }

                        for (int pi = 0; pi < 6; pi++)                         // invert for insert
                        {
                            pva[pi][pvaCount] = vo[pi];
                        }

                        if (Debug)
                        {
                            msg = String.Format("CorpId: {0}, mf: {1}, chime: {2}, smiles: {3}", CorpId.ToString(), mf.Length, chime.Length, smiles.Length);
                            Log(msg);
                        }

                        pvaCount++;
                    }

                    if (pvaCount >= InsertBufferSize || (!readOk && pvaCount > 0))                     // write if buffer full or at end
                    {
                        try
                        {
                            if (deleteExisting)
                            {
                                int delCount = DoDeletes(CorpIdList);
                                updateCount += delCount;                                 // count deletes as updates
                                insertCount -= delCount;                                 // subtract from inserts
                            }
                            CorpIdList.Clear();

                            insCount = insertCmd.ExecuteArrayNonReader(pva, ref pvaCount);
                            insertCmd.Commit();
                            insertCmd.BeginTransaction();
                            insertCount += insCount;
                        }

                        catch (Exception ex)
                        {
                            throw new Exception(ex.Message, ex);
                        }

                        if (byDateRange)
                        {
                            string checkPointDate2 = String.Format("{0:dd-MMM-yyyy HHmmss}", moleculeDateTime);                             // format date time that will work with oracle
                            UserObjectDao.SetUserParameter("MOBIUS", "UpdateCorpDbMoltableMxCheckpointDate", checkPointDate2);
                            msg = "Processing where date >= " + checkPointDate + ", Reads: " + readCount + ", Inserts: " + insertCount + ", Updates: " + updateCount;
                        }

                        else if (byCorpIdRange)                         // CorpId range
                        {
                            msg = "Processing: " + lowCorpId + " to " + highCorpId + ", Reads: " + readCount + ", Inserts: " + insertCount;
                        }

                        else if (missingFix)
                        {
                            msg = "Fixing missing smiles, Updates: " + updateCount;
                        }

                        msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures);

                        Progress.Show(msg);
                    }

                    if (!readOk)
                    {
                        break;
                    }

                    readCount++;
                }

                readCmd.Dispose();
                insertCmd.Dispose();
            }             // end for select chunk

            msg  = "UpdateCorpDbMoltableMx - Inserts: " + insertCount + ", Updates: " + updateCount;
            msg += String.Format(", Undefined structures: {0} , Smiles failures: {1}, Helms: {2}", undefinedStructures, smilesFails, helmStructures);
            Log(msg);

            return(msg);
        }
Exemple #6
0
/// <summary>
/// Load data into PubChem database
/// PubChem assay data files are downloaded from the PubChem site:
/// http://pubchem.ncbi.nlm.nih.gov/ using a program like SmartFTP.
/// The files are in GNU Zip (.gz) format and can be unzipped with
/// the following gzip commands:
///  c:\gzip\gzip -d c:\pubchem\bioassay\csv\description\*.gz
///  c:\gzip\gzip -d c:\pubchem\bioassay\csv\data\*.gz
/// After downloading and decompression this method can be called on the files.
/// </summary>
/// <param name="args"></param>
/// <returns></returns>

        public static string LoadData(
            string aid)
        {
            int recCount = 0;

            string    mtName = "PubChem_aid_" + aid;
            MetaTable mt     = MetaTableCollection.Get(mtName);

            if (mt == null)
            {
                return("Failed to get metatable");
            }

//			if (Math.Sqrt(4) == 2) goto UpdateCids;

            string       fileName = PubChemAssayDirectory + @"\CSV\Data\" + aid + ".csv";
            StreamReader sr;

            try { sr = new StreamReader(fileName); }
            catch (Exception ex) { return("File not found: " + fileName); }

            string        header  = sr.ReadLine();     // read headers line
            List <string> headers = Csv.SplitCsvString(header);
            int           cidIdx  = -1;

            for (cidIdx = 0; cidIdx < headers.Count; cidIdx++)
            {
                if (headers[cidIdx].ToUpper() == "PUBCHEM_CID")
                {
                    break;
                }
            }
            if (cidIdx >= headers.Count)
            {
                sr.Close();
                return("PUBCHEM_CID column not found in data headers");
            }

            Dictionary <string, MetaColumn> mcd = new Dictionary <string, MetaColumn>();

            foreach (MetaColumn mc2 in mt.MetaColumns)
            {
                mcd[mc2.Name.ToUpper()] = mc2;                 // build dict for quick metacolumn lookup
            }
            DbConnectionMx conn = DbConnectionMx.MapSqlToConnection(ref PubChemWarehouseTable);

            conn.BeginTransaction();             // do multiple updates per transaction

            GenericDwDao dao = new GenericDwDao(
                PubChemWarehouseTable,                          // table for results
                PubChemWarehouseSeq);                           // sequence to use

            dao.BufferInserts(true);                            // buffer inserts for better speed

            SequenceDao.SetCacheSize(PubChemWarehouseSeq, 100); // number of ids to cache locally from sequence

            //string progressMsg = "Deleting existing data...";
            int i1 = dao.DeleteTable(Int32.Parse(mt.TableFilterValues[0]), true);

            //if (Progress.CancelRequested())
            //{
            //  dao.Dispose();
            //  return "Cancelled during data delete";
            //}

            //Progress.Show("Loading file...");

            recCount = 0;
            int t1 = 0;

            while (true)
            {
                int t2 = TimeOfDay.Milliseconds();
                if (t2 - t1 > 1000)
                {
                    if (Progress.CancelRequested)
                    {
                        dao.ExecuteBufferedInserts();
                        conn.Commit();
                        conn.Close();
                        sr.Close();
                        Progress.Hide();
                        return(recCount.ToString() + " rows loaded");
                    }
                    Progress.Show("Loading file (" + recCount.ToString() + ") ...");
                    t1 = t2;
                }

                string rec = sr.ReadLine();
                if (rec == null)
                {
                    break;
                }
                List <string> vals = Csv.SplitCsvString(rec);
                int           cid;
                try { cid = Int32.Parse(vals[cidIdx]); }                 // get compound id
                catch (Exception ex)
                {
                    string txtCid = vals[cidIdx];
                    if (txtCid == null)
                    {
                        txtCid = "";
                    }
                    DebugLog.Message("Load PubChem bad CID " + txtCid + ", AID = " + aid);
                    continue;
                }

                long rslt_grp_id = dao.GetNextIdLong();                 // id to hold row together
                for (int vi = 0; vi < vals.Count; vi++)
                {
                    string s = vals[vi];
                    if (s == "")
                    {
                        continue;
                    }
                    string[] sa = rec.Split(',');
                    if (vi >= headers.Count)
                    {
                        continue;
                    }
                    string mcName = headers[vi].ToUpper();
                    if (mcName.Length > 26)
                    {
                        mcName = mcName.Substring(0, 26);                                         // limit length to 26
                    }
                    if (mcName == "PUBCHEM_CID")
                    {
                        continue;
                    }

                    if (Lex.IsInteger(mcName))
                    {
                        mcName = "R_" + mcName;                                            // result number
                    }
                    MetaColumn mc = mcd[mcName];
                    if (mc == null)
                    {
                        continue;
                    }

                    AnnotationVo vo = new AnnotationVo();
                    vo.rslt_grp_id = rslt_grp_id;

                    if (mc.DataType == MetaColumnType.String)
                    {
                        vo.rslt_val_txt = s;
                    }

                    else if (mc.DataType == MetaColumnType.Number || mc.DataType == MetaColumnType.Integer)
                    {
                        try
                        {
                            vo.rslt_val_nbr = Convert.ToDouble(s);
                        }
                        catch (Exception e) { continue; }                         // just continue if bad
                    }

                    else if (mc.DataType == MetaColumnType.Date)
                    {
                        s = DateTimeMx.Normalize(s);
                        if (s == null)
                        {
                            continue;
                        }
                        vo.rslt_val_dt = DateTimeMx.NormalizedToDateTime(s);
                    }

                    else if (mc.Name == "PUBCHEM_ACTIVITY_OUTCOME")                     // activity outcome is a dict value stored as an integer
                    {
                        try
                        {
                            vo.rslt_val_nbr = Convert.ToInt32(s);
                        }
                        catch (Exception e) { continue; }                         // just continue if bad
                    }

                    else if (mc.DataType == MetaColumnType.Hyperlink ||
                             mc.DataType == MetaColumnType.DictionaryId)
                    {
                        vo.rslt_val_txt = s;
                    }

                    else
                    {
                        continue;
                    }

                    vo.ext_cmpnd_id_nbr = cid;
                    vo.ext_cmpnd_id_txt = cid.ToString();
                    vo.mthd_vrsn_id     = Int32.Parse(mt.TableFilterValues[0]);
                    vo.rslt_typ_id      = Int32.Parse(mc.PivotValues[0]);
                    vo.chng_op_cd       = "I";
                    vo.chng_usr_id      = Security.UserInfo.UserName;

                    dao.Insert(vo);
                }                 // end of field loop

                recCount++;
                if (recCount % 100 == 0)
                {                 // commit after group of updates
                    dao.ExecuteBufferedInserts();
                    conn.Commit();
                    conn.BeginTransaction(); // do multiple updates per transaction
                }
            }                                // end of record loop

            dao.ExecuteBufferedInserts();
            conn.Commit();
            conn.Close();
            dao.Dispose();
            sr.Close();

//UpdateCids: // Add any missing CIDs under method 1000000

            Progress.Show("Updating CID table...");

            string sql =
                "INSERT INTO " + PubChemWarehouseTable + "(ext_cmpnd_id_nbr,rslt_id,mthd_vrsn_id,rslt_typ_id,rslt_grp_id) " +
                "SELECT ext_cmpnd_id_nbr, " + PubChemWarehouseSeq + ".NEXTVAL,1000000,0,0 " +
                "FROM ( " +
                "SELECT UNIQUE ext_cmpnd_id_nbr " +
                "FROM " + PubChemWarehouseTable + " r1 " +
                "WHERE mthd_vrsn_id = " + aid + " " +
                "AND NOT EXISTS ( " +
                " SELECT * " +
                "FROM " + PubChemWarehouseTable + " r2 " +
                "WHERE mthd_vrsn_id = 1000000 " +
                "AND r2.ext_cmpnd_id_nbr = r1.ext_cmpnd_id_nbr) " +
                "and rownum <= 10000)";

            DbCommandMx drd = new DbCommandMx();

            drd.Prepare(sql);
            drd.BeginTransaction();

            int newCids = 0;

            while (true)
            {
                int addedCids = drd.ExecuteNonReader();
                if (addedCids == 0)
                {
                    break;
                }
                newCids += addedCids;
                drd.Commit();
                drd.BeginTransaction();                 // do multiple updates per transaction
                Progress.Show("Updating CID table (" + newCids.ToString() + ")...");
            }

            drd.Dispose();

            Progress.Hide();
            return(recCount.ToString() + " rows loaded for AID " + aid + " plus " + newCids.ToString() + " new CIDs");
        }