Example #1
0
/// <summary>
/// Load data into PubChem database
/// PubChem assay data files are downloaded from the PubChem site:
/// http://pubchem.ncbi.nlm.nih.gov/ using a program like SmartFTP.
/// The files are in GNU Zip (.gz) format and can be unzipped with
/// the following gzip commands:
///  c:\gzip\gzip -d c:\pubchem\bioassay\csv\description\*.gz
///  c:\gzip\gzip -d c:\pubchem\bioassay\csv\data\*.gz
/// After downloading and decompression this method can be called on the files.
/// </summary>
/// <param name="args"></param>
/// <returns></returns>

        public static string LoadData(
            string aid)
        {
            int recCount = 0;

            string    mtName = "PubChem_aid_" + aid;
            MetaTable mt     = MetaTableCollection.Get(mtName);

            if (mt == null)
            {
                return("Failed to get metatable");
            }

//			if (Math.Sqrt(4) == 2) goto UpdateCids;

            string       fileName = PubChemAssayDirectory + @"\CSV\Data\" + aid + ".csv";
            StreamReader sr;

            try { sr = new StreamReader(fileName); }
            catch (Exception ex) { return("File not found: " + fileName); }

            string        header  = sr.ReadLine();     // read headers line
            List <string> headers = Csv.SplitCsvString(header);
            int           cidIdx  = -1;

            for (cidIdx = 0; cidIdx < headers.Count; cidIdx++)
            {
                if (headers[cidIdx].ToUpper() == "PUBCHEM_CID")
                {
                    break;
                }
            }
            if (cidIdx >= headers.Count)
            {
                sr.Close();
                return("PUBCHEM_CID column not found in data headers");
            }

            Dictionary <string, MetaColumn> mcd = new Dictionary <string, MetaColumn>();

            foreach (MetaColumn mc2 in mt.MetaColumns)
            {
                mcd[mc2.Name.ToUpper()] = mc2;                 // build dict for quick metacolumn lookup
            }
            DbConnectionMx conn = DbConnectionMx.MapSqlToConnection(ref PubChemWarehouseTable);

            conn.BeginTransaction();             // do multiple updates per transaction

            GenericDwDao dao = new GenericDwDao(
                PubChemWarehouseTable,                          // table for results
                PubChemWarehouseSeq);                           // sequence to use

            dao.BufferInserts(true);                            // buffer inserts for better speed

            SequenceDao.SetCacheSize(PubChemWarehouseSeq, 100); // number of ids to cache locally from sequence

            //string progressMsg = "Deleting existing data...";
            int i1 = dao.DeleteTable(Int32.Parse(mt.TableFilterValues[0]), true);

            //if (Progress.CancelRequested())
            //{
            //  dao.Dispose();
            //  return "Cancelled during data delete";
            //}

            //Progress.Show("Loading file...");

            recCount = 0;
            int t1 = 0;

            while (true)
            {
                int t2 = TimeOfDay.Milliseconds();
                if (t2 - t1 > 1000)
                {
                    if (Progress.CancelRequested)
                    {
                        dao.ExecuteBufferedInserts();
                        conn.Commit();
                        conn.Close();
                        sr.Close();
                        Progress.Hide();
                        return(recCount.ToString() + " rows loaded");
                    }
                    Progress.Show("Loading file (" + recCount.ToString() + ") ...");
                    t1 = t2;
                }

                string rec = sr.ReadLine();
                if (rec == null)
                {
                    break;
                }
                List <string> vals = Csv.SplitCsvString(rec);
                int           cid;
                try { cid = Int32.Parse(vals[cidIdx]); }                 // get compound id
                catch (Exception ex)
                {
                    string txtCid = vals[cidIdx];
                    if (txtCid == null)
                    {
                        txtCid = "";
                    }
                    DebugLog.Message("Load PubChem bad CID " + txtCid + ", AID = " + aid);
                    continue;
                }

                long rslt_grp_id = dao.GetNextIdLong();                 // id to hold row together
                for (int vi = 0; vi < vals.Count; vi++)
                {
                    string s = vals[vi];
                    if (s == "")
                    {
                        continue;
                    }
                    string[] sa = rec.Split(',');
                    if (vi >= headers.Count)
                    {
                        continue;
                    }
                    string mcName = headers[vi].ToUpper();
                    if (mcName.Length > 26)
                    {
                        mcName = mcName.Substring(0, 26);                                         // limit length to 26
                    }
                    if (mcName == "PUBCHEM_CID")
                    {
                        continue;
                    }

                    if (Lex.IsInteger(mcName))
                    {
                        mcName = "R_" + mcName;                                            // result number
                    }
                    MetaColumn mc = mcd[mcName];
                    if (mc == null)
                    {
                        continue;
                    }

                    AnnotationVo vo = new AnnotationVo();
                    vo.rslt_grp_id = rslt_grp_id;

                    if (mc.DataType == MetaColumnType.String)
                    {
                        vo.rslt_val_txt = s;
                    }

                    else if (mc.DataType == MetaColumnType.Number || mc.DataType == MetaColumnType.Integer)
                    {
                        try
                        {
                            vo.rslt_val_nbr = Convert.ToDouble(s);
                        }
                        catch (Exception e) { continue; }                         // just continue if bad
                    }

                    else if (mc.DataType == MetaColumnType.Date)
                    {
                        s = DateTimeMx.Normalize(s);
                        if (s == null)
                        {
                            continue;
                        }
                        vo.rslt_val_dt = DateTimeMx.NormalizedToDateTime(s);
                    }

                    else if (mc.Name == "PUBCHEM_ACTIVITY_OUTCOME")                     // activity outcome is a dict value stored as an integer
                    {
                        try
                        {
                            vo.rslt_val_nbr = Convert.ToInt32(s);
                        }
                        catch (Exception e) { continue; }                         // just continue if bad
                    }

                    else if (mc.DataType == MetaColumnType.Hyperlink ||
                             mc.DataType == MetaColumnType.DictionaryId)
                    {
                        vo.rslt_val_txt = s;
                    }

                    else
                    {
                        continue;
                    }

                    vo.ext_cmpnd_id_nbr = cid;
                    vo.ext_cmpnd_id_txt = cid.ToString();
                    vo.mthd_vrsn_id     = Int32.Parse(mt.TableFilterValues[0]);
                    vo.rslt_typ_id      = Int32.Parse(mc.PivotValues[0]);
                    vo.chng_op_cd       = "I";
                    vo.chng_usr_id      = Security.UserInfo.UserName;

                    dao.Insert(vo);
                }                 // end of field loop

                recCount++;
                if (recCount % 100 == 0)
                {                 // commit after group of updates
                    dao.ExecuteBufferedInserts();
                    conn.Commit();
                    conn.BeginTransaction(); // do multiple updates per transaction
                }
            }                                // end of record loop

            dao.ExecuteBufferedInserts();
            conn.Commit();
            conn.Close();
            dao.Dispose();
            sr.Close();

//UpdateCids: // Add any missing CIDs under method 1000000

            Progress.Show("Updating CID table...");

            string sql =
                "INSERT INTO " + PubChemWarehouseTable + "(ext_cmpnd_id_nbr,rslt_id,mthd_vrsn_id,rslt_typ_id,rslt_grp_id) " +
                "SELECT ext_cmpnd_id_nbr, " + PubChemWarehouseSeq + ".NEXTVAL,1000000,0,0 " +
                "FROM ( " +
                "SELECT UNIQUE ext_cmpnd_id_nbr " +
                "FROM " + PubChemWarehouseTable + " r1 " +
                "WHERE mthd_vrsn_id = " + aid + " " +
                "AND NOT EXISTS ( " +
                " SELECT * " +
                "FROM " + PubChemWarehouseTable + " r2 " +
                "WHERE mthd_vrsn_id = 1000000 " +
                "AND r2.ext_cmpnd_id_nbr = r1.ext_cmpnd_id_nbr) " +
                "and rownum <= 10000)";

            DbCommandMx drd = new DbCommandMx();

            drd.Prepare(sql);
            drd.BeginTransaction();

            int newCids = 0;

            while (true)
            {
                int addedCids = drd.ExecuteNonReader();
                if (addedCids == 0)
                {
                    break;
                }
                newCids += addedCids;
                drd.Commit();
                drd.BeginTransaction();                 // do multiple updates per transaction
                Progress.Show("Updating CID table (" + newCids.ToString() + ")...");
            }

            drd.Dispose();

            Progress.Hide();
            return(recCount.ToString() + " rows loaded for AID " + aid + " plus " + newCids.ToString() + " new CIDs");
        }
Example #2
0
        private static Alert DeserializeHeaderOld(
            UserObject uo,
            bool fast)
        {
            string[] sa;

            Alert a = new Alert();

            a.Id = uo.Id;

            //if (uo.Description.Trim().Length > 0 && uo.Description.Split('\t').Length == 2)
            //{ // sometimes the desc contains just the queryId \t lastNewDate
            //  sa = uo.Description.Split('\t');
            //  string qidString = sa[0].Substring(sa[0].IndexOf(" ") + 1);
            //  a.QueryObjId = int.Parse(qidString);
            //  a.LastNewData = DateTimeEx.DateTimeParseUS(sa[1]);
            //  return a;
            //}

            if (uo.Description.Trim().Length > 0)
            {             // have old new-style info?
                try
                {
                    sa           = uo.Description.Split('\t');
                    a.Owner      = sa[0];
                    a.QueryObjId = Int32.Parse(sa[1]);
                    a.Interval   = Int32.Parse(sa[2]);
                    a.MailTo     = sa[3];
                    if (a.MailTo == null || a.MailTo.Trim().Length == 0)                     // need to get email address
                    {
                        a.MailTo = Security.GetUserEmailAddress(a.Owner);
                    }

                    a.LastCheck        = DateTimeMx.NormalizedToDateTime(sa[4]);
                    a.NewCompounds     = Int32.Parse(sa[5]);
                    a.ChangedCompounds = Int32.Parse(sa[6]);
                    a.TotalCompounds   = Int32.Parse(sa[7]);
                    a.NewRows          = Int32.Parse(sa[8]);
                    a.TotalRows        = Int32.Parse(sa[9]);
                    if (sa.Length > 10)
                    {
                        a.CheckTablesWithCriteriaOnly = bool.Parse(sa[10]);
                    }
                    if (sa.Length > 11)
                    {
                        a.LastNewData = DateTimeMx.NormalizedToDateTime(sa[11]);
                    }
                }
                catch (Exception ex) { throw new Exception(ex.Message, ex); }                 // debug
            }

            else             // old style alert, get partial header info
            {
                a.Owner      = uo.Owner;
                a.QueryObjId = Int32.Parse(uo.Name.Substring(uo.Name.IndexOf("_") + 1));
                a.Interval   = 1;               // assume just 1 day
                if (!fast)
                {
                    try                     // get old last check date
                    {
                        Query q = QbUtil.ReadQuery(a.QueryObjId);
                        if (q.AlertInterval > 0)
                        {
                            a.Interval = q.AlertInterval;
                        }
                        sa          = uo.Content.Split('\t');
                        a.LastCheck = DateTimeMx.NormalizedToDateTime(sa[4]);
                    }
                    catch (Exception ex) { string msg = ex.Message; }
                }

                a.MailTo = Security.GetUserEmailAddress(a.Owner);                 // need to get email address
            }

            return(a);
        }