Пример #1
0
 /// <summary>
 /// Starts the specified sc application.
 /// </summary>
 /// <param name="scApp">The sc application.</param>
 public void start(SCApplication scApp)
 {
     this.scApp = scApp;
     seqDao     = scApp.SequenceDao;
 }
Пример #2
0
 public SequenceBusiness(MardisContext mardisContext) : base(mardisContext)
 {
     _sequenceDao = new SequenceDao(mardisContext);
 }
Пример #3
0
/// <summary>
/// Load data into PubChem database
/// PubChem assay data files are downloaded from the PubChem site:
/// http://pubchem.ncbi.nlm.nih.gov/ using a program like SmartFTP.
/// The files are in GNU Zip (.gz) format and can be unzipped with
/// the following gzip commands:
///  c:\gzip\gzip -d c:\pubchem\bioassay\csv\description\*.gz
///  c:\gzip\gzip -d c:\pubchem\bioassay\csv\data\*.gz
/// After downloading and decompression this method can be called on the files.
/// </summary>
/// <param name="args"></param>
/// <returns></returns>

        public static string LoadData(
            string aid)
        {
            int recCount = 0;

            string    mtName = "PubChem_aid_" + aid;
            MetaTable mt     = MetaTableCollection.Get(mtName);

            if (mt == null)
            {
                return("Failed to get metatable");
            }

//			if (Math.Sqrt(4) == 2) goto UpdateCids;

            string       fileName = PubChemAssayDirectory + @"\CSV\Data\" + aid + ".csv";
            StreamReader sr;

            try { sr = new StreamReader(fileName); }
            catch (Exception ex) { return("File not found: " + fileName); }

            string        header  = sr.ReadLine();     // read headers line
            List <string> headers = Csv.SplitCsvString(header);
            int           cidIdx  = -1;

            for (cidIdx = 0; cidIdx < headers.Count; cidIdx++)
            {
                if (headers[cidIdx].ToUpper() == "PUBCHEM_CID")
                {
                    break;
                }
            }
            if (cidIdx >= headers.Count)
            {
                sr.Close();
                return("PUBCHEM_CID column not found in data headers");
            }

            Dictionary <string, MetaColumn> mcd = new Dictionary <string, MetaColumn>();

            foreach (MetaColumn mc2 in mt.MetaColumns)
            {
                mcd[mc2.Name.ToUpper()] = mc2;                 // build dict for quick metacolumn lookup
            }
            DbConnectionMx conn = DbConnectionMx.MapSqlToConnection(ref PubChemWarehouseTable);

            conn.BeginTransaction();             // do multiple updates per transaction

            GenericDwDao dao = new GenericDwDao(
                PubChemWarehouseTable,                          // table for results
                PubChemWarehouseSeq);                           // sequence to use

            dao.BufferInserts(true);                            // buffer inserts for better speed

            SequenceDao.SetCacheSize(PubChemWarehouseSeq, 100); // number of ids to cache locally from sequence

            //string progressMsg = "Deleting existing data...";
            int i1 = dao.DeleteTable(Int32.Parse(mt.TableFilterValues[0]), true);

            //if (Progress.CancelRequested())
            //{
            //  dao.Dispose();
            //  return "Cancelled during data delete";
            //}

            //Progress.Show("Loading file...");

            recCount = 0;
            int t1 = 0;

            while (true)
            {
                int t2 = TimeOfDay.Milliseconds();
                if (t2 - t1 > 1000)
                {
                    if (Progress.CancelRequested)
                    {
                        dao.ExecuteBufferedInserts();
                        conn.Commit();
                        conn.Close();
                        sr.Close();
                        Progress.Hide();
                        return(recCount.ToString() + " rows loaded");
                    }
                    Progress.Show("Loading file (" + recCount.ToString() + ") ...");
                    t1 = t2;
                }

                string rec = sr.ReadLine();
                if (rec == null)
                {
                    break;
                }
                List <string> vals = Csv.SplitCsvString(rec);
                int           cid;
                try { cid = Int32.Parse(vals[cidIdx]); }                 // get compound id
                catch (Exception ex)
                {
                    string txtCid = vals[cidIdx];
                    if (txtCid == null)
                    {
                        txtCid = "";
                    }
                    DebugLog.Message("Load PubChem bad CID " + txtCid + ", AID = " + aid);
                    continue;
                }

                long rslt_grp_id = dao.GetNextIdLong();                 // id to hold row together
                for (int vi = 0; vi < vals.Count; vi++)
                {
                    string s = vals[vi];
                    if (s == "")
                    {
                        continue;
                    }
                    string[] sa = rec.Split(',');
                    if (vi >= headers.Count)
                    {
                        continue;
                    }
                    string mcName = headers[vi].ToUpper();
                    if (mcName.Length > 26)
                    {
                        mcName = mcName.Substring(0, 26);                                         // limit length to 26
                    }
                    if (mcName == "PUBCHEM_CID")
                    {
                        continue;
                    }

                    if (Lex.IsInteger(mcName))
                    {
                        mcName = "R_" + mcName;                                            // result number
                    }
                    MetaColumn mc = mcd[mcName];
                    if (mc == null)
                    {
                        continue;
                    }

                    AnnotationVo vo = new AnnotationVo();
                    vo.rslt_grp_id = rslt_grp_id;

                    if (mc.DataType == MetaColumnType.String)
                    {
                        vo.rslt_val_txt = s;
                    }

                    else if (mc.DataType == MetaColumnType.Number || mc.DataType == MetaColumnType.Integer)
                    {
                        try
                        {
                            vo.rslt_val_nbr = Convert.ToDouble(s);
                        }
                        catch (Exception e) { continue; }                         // just continue if bad
                    }

                    else if (mc.DataType == MetaColumnType.Date)
                    {
                        s = DateTimeMx.Normalize(s);
                        if (s == null)
                        {
                            continue;
                        }
                        vo.rslt_val_dt = DateTimeMx.NormalizedToDateTime(s);
                    }

                    else if (mc.Name == "PUBCHEM_ACTIVITY_OUTCOME")                     // activity outcome is a dict value stored as an integer
                    {
                        try
                        {
                            vo.rslt_val_nbr = Convert.ToInt32(s);
                        }
                        catch (Exception e) { continue; }                         // just continue if bad
                    }

                    else if (mc.DataType == MetaColumnType.Hyperlink ||
                             mc.DataType == MetaColumnType.DictionaryId)
                    {
                        vo.rslt_val_txt = s;
                    }

                    else
                    {
                        continue;
                    }

                    vo.ext_cmpnd_id_nbr = cid;
                    vo.ext_cmpnd_id_txt = cid.ToString();
                    vo.mthd_vrsn_id     = Int32.Parse(mt.TableFilterValues[0]);
                    vo.rslt_typ_id      = Int32.Parse(mc.PivotValues[0]);
                    vo.chng_op_cd       = "I";
                    vo.chng_usr_id      = Security.UserInfo.UserName;

                    dao.Insert(vo);
                }                 // end of field loop

                recCount++;
                if (recCount % 100 == 0)
                {                 // commit after group of updates
                    dao.ExecuteBufferedInserts();
                    conn.Commit();
                    conn.BeginTransaction(); // do multiple updates per transaction
                }
            }                                // end of record loop

            dao.ExecuteBufferedInserts();
            conn.Commit();
            conn.Close();
            dao.Dispose();
            sr.Close();

//UpdateCids: // Add any missing CIDs under method 1000000

            Progress.Show("Updating CID table...");

            string sql =
                "INSERT INTO " + PubChemWarehouseTable + "(ext_cmpnd_id_nbr,rslt_id,mthd_vrsn_id,rslt_typ_id,rslt_grp_id) " +
                "SELECT ext_cmpnd_id_nbr, " + PubChemWarehouseSeq + ".NEXTVAL,1000000,0,0 " +
                "FROM ( " +
                "SELECT UNIQUE ext_cmpnd_id_nbr " +
                "FROM " + PubChemWarehouseTable + " r1 " +
                "WHERE mthd_vrsn_id = " + aid + " " +
                "AND NOT EXISTS ( " +
                " SELECT * " +
                "FROM " + PubChemWarehouseTable + " r2 " +
                "WHERE mthd_vrsn_id = 1000000 " +
                "AND r2.ext_cmpnd_id_nbr = r1.ext_cmpnd_id_nbr) " +
                "and rownum <= 10000)";

            DbCommandMx drd = new DbCommandMx();

            drd.Prepare(sql);
            drd.BeginTransaction();

            int newCids = 0;

            while (true)
            {
                int addedCids = drd.ExecuteNonReader();
                if (addedCids == 0)
                {
                    break;
                }
                newCids += addedCids;
                drd.Commit();
                drd.BeginTransaction();                 // do multiple updates per transaction
                Progress.Show("Updating CID table (" + newCids.ToString() + ")...");
            }

            drd.Dispose();

            Progress.Hide();
            return(recCount.ToString() + " rows loaded for AID " + aid + " plus " + newCids.ToString() + " new CIDs");
        }