/// <summary> /// Starts the specified sc application. /// </summary> /// <param name="scApp">The sc application.</param> public void start(SCApplication scApp) { this.scApp = scApp; seqDao = scApp.SequenceDao; }
public SequenceBusiness(MardisContext mardisContext) : base(mardisContext) { _sequenceDao = new SequenceDao(mardisContext); }
/// <summary> /// Load data into PubChem database /// PubChem assay data files are downloaded from the PubChem site: /// http://pubchem.ncbi.nlm.nih.gov/ using a program like SmartFTP. /// The files are in GNU Zip (.gz) format and can be unzipped with /// the following gzip commands: /// c:\gzip\gzip -d c:\pubchem\bioassay\csv\description\*.gz /// c:\gzip\gzip -d c:\pubchem\bioassay\csv\data\*.gz /// After downloading and decompression this method can be called on the files. /// </summary> /// <param name="args"></param> /// <returns></returns> public static string LoadData( string aid) { int recCount = 0; string mtName = "PubChem_aid_" + aid; MetaTable mt = MetaTableCollection.Get(mtName); if (mt == null) { return("Failed to get metatable"); } // if (Math.Sqrt(4) == 2) goto UpdateCids; string fileName = PubChemAssayDirectory + @"\CSV\Data\" + aid + ".csv"; StreamReader sr; try { sr = new StreamReader(fileName); } catch (Exception ex) { return("File not found: " + fileName); } string header = sr.ReadLine(); // read headers line List <string> headers = Csv.SplitCsvString(header); int cidIdx = -1; for (cidIdx = 0; cidIdx < headers.Count; cidIdx++) { if (headers[cidIdx].ToUpper() == "PUBCHEM_CID") { break; } } if (cidIdx >= headers.Count) { sr.Close(); return("PUBCHEM_CID column not found in data headers"); } Dictionary <string, MetaColumn> mcd = new Dictionary <string, MetaColumn>(); foreach (MetaColumn mc2 in mt.MetaColumns) { mcd[mc2.Name.ToUpper()] = mc2; // build dict for quick metacolumn lookup } DbConnectionMx conn = DbConnectionMx.MapSqlToConnection(ref PubChemWarehouseTable); conn.BeginTransaction(); // do multiple updates per transaction GenericDwDao dao = new GenericDwDao( PubChemWarehouseTable, // table for results PubChemWarehouseSeq); // sequence to use dao.BufferInserts(true); // buffer inserts for better speed SequenceDao.SetCacheSize(PubChemWarehouseSeq, 100); // number of ids to cache locally from sequence //string progressMsg = "Deleting existing data..."; int i1 = dao.DeleteTable(Int32.Parse(mt.TableFilterValues[0]), true); //if (Progress.CancelRequested()) //{ // dao.Dispose(); // return "Cancelled during data delete"; //} //Progress.Show("Loading file..."); recCount = 0; int t1 = 0; while (true) { int t2 = TimeOfDay.Milliseconds(); if (t2 - t1 > 1000) { if (Progress.CancelRequested) { dao.ExecuteBufferedInserts(); conn.Commit(); conn.Close(); sr.Close(); Progress.Hide(); return(recCount.ToString() + " rows loaded"); } Progress.Show("Loading file (" + recCount.ToString() + ") ..."); t1 = t2; } string rec = sr.ReadLine(); if (rec == null) { break; } List <string> vals = Csv.SplitCsvString(rec); int cid; try { cid = Int32.Parse(vals[cidIdx]); } // get compound id catch (Exception ex) { string txtCid = vals[cidIdx]; if (txtCid == null) { txtCid = ""; } DebugLog.Message("Load PubChem bad CID " + txtCid + ", AID = " + aid); continue; } long rslt_grp_id = dao.GetNextIdLong(); // id to hold row together for (int vi = 0; vi < vals.Count; vi++) { string s = vals[vi]; if (s == "") { continue; } string[] sa = rec.Split(','); if (vi >= headers.Count) { continue; } string mcName = headers[vi].ToUpper(); if (mcName.Length > 26) { mcName = mcName.Substring(0, 26); // limit length to 26 } if (mcName == "PUBCHEM_CID") { continue; } if (Lex.IsInteger(mcName)) { mcName = "R_" + mcName; // result number } MetaColumn mc = mcd[mcName]; if (mc == null) { continue; } AnnotationVo vo = new AnnotationVo(); vo.rslt_grp_id = rslt_grp_id; if (mc.DataType == MetaColumnType.String) { vo.rslt_val_txt = s; } else if (mc.DataType == MetaColumnType.Number || mc.DataType == MetaColumnType.Integer) { try { vo.rslt_val_nbr = Convert.ToDouble(s); } catch (Exception e) { continue; } // just continue if bad } else if (mc.DataType == MetaColumnType.Date) { s = DateTimeMx.Normalize(s); if (s == null) { continue; } vo.rslt_val_dt = DateTimeMx.NormalizedToDateTime(s); } else if (mc.Name == "PUBCHEM_ACTIVITY_OUTCOME") // activity outcome is a dict value stored as an integer { try { vo.rslt_val_nbr = Convert.ToInt32(s); } catch (Exception e) { continue; } // just continue if bad } else if (mc.DataType == MetaColumnType.Hyperlink || mc.DataType == MetaColumnType.DictionaryId) { vo.rslt_val_txt = s; } else { continue; } vo.ext_cmpnd_id_nbr = cid; vo.ext_cmpnd_id_txt = cid.ToString(); vo.mthd_vrsn_id = Int32.Parse(mt.TableFilterValues[0]); vo.rslt_typ_id = Int32.Parse(mc.PivotValues[0]); vo.chng_op_cd = "I"; vo.chng_usr_id = Security.UserInfo.UserName; dao.Insert(vo); } // end of field loop recCount++; if (recCount % 100 == 0) { // commit after group of updates dao.ExecuteBufferedInserts(); conn.Commit(); conn.BeginTransaction(); // do multiple updates per transaction } } // end of record loop dao.ExecuteBufferedInserts(); conn.Commit(); conn.Close(); dao.Dispose(); sr.Close(); //UpdateCids: // Add any missing CIDs under method 1000000 Progress.Show("Updating CID table..."); string sql = "INSERT INTO " + PubChemWarehouseTable + "(ext_cmpnd_id_nbr,rslt_id,mthd_vrsn_id,rslt_typ_id,rslt_grp_id) " + "SELECT ext_cmpnd_id_nbr, " + PubChemWarehouseSeq + ".NEXTVAL,1000000,0,0 " + "FROM ( " + "SELECT UNIQUE ext_cmpnd_id_nbr " + "FROM " + PubChemWarehouseTable + " r1 " + "WHERE mthd_vrsn_id = " + aid + " " + "AND NOT EXISTS ( " + " SELECT * " + "FROM " + PubChemWarehouseTable + " r2 " + "WHERE mthd_vrsn_id = 1000000 " + "AND r2.ext_cmpnd_id_nbr = r1.ext_cmpnd_id_nbr) " + "and rownum <= 10000)"; DbCommandMx drd = new DbCommandMx(); drd.Prepare(sql); drd.BeginTransaction(); int newCids = 0; while (true) { int addedCids = drd.ExecuteNonReader(); if (addedCids == 0) { break; } newCids += addedCids; drd.Commit(); drd.BeginTransaction(); // do multiple updates per transaction Progress.Show("Updating CID table (" + newCids.ToString() + ")..."); } drd.Dispose(); Progress.Hide(); return(recCount.ToString() + " rows loaded for AID " + aid + " plus " + newCids.ToString() + " new CIDs"); }