/// <exception cref="Java.Sql.SQLException"/> public OracleDBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass, Configuration conf, Connection conn, DBConfiguration dbConfig, string cond, string[] fields, string table) : base(split, inputClass, conf, conn, dbConfig, cond, fields, table) { SetSessionTimeZone(conf, conn); }
/// <exception cref="System.IO.IOException"/> protected internal override RecordReader <LongWritable, T> CreateDBRecordReader(DBInputFormat.DBInputSplit split, Configuration conf) { DBConfiguration dbConf = GetDBConf(); Type inputClass = (Type)(dbConf.GetInputClass()); string dbProductName = GetDBProductName(); Log.Debug("Creating db record reader for db product: " + dbProductName); try { // use database product name to determine appropriate record reader. if (dbProductName.StartsWith("MYSQL")) { // use MySQL-specific db reader. return(new MySQLDataDrivenDBRecordReader <T>(split, inputClass, conf, CreateConnection (), dbConf, dbConf.GetInputConditions(), dbConf.GetInputFieldNames(), dbConf.GetInputTableName ())); } else { // Generic reader. return(new DataDrivenDBRecordReader <T>(split, inputClass, conf, CreateConnection( ), dbConf, dbConf.GetInputConditions(), dbConf.GetInputFieldNames(), dbConf.GetInputTableName (), dbProductName)); } } catch (SQLException ex) { throw new IOException(ex.Message); } }
/// <exception cref="Java.Sql.SQLException"/> public OracleDataDrivenDBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass , Configuration conf, Connection conn, DBConfiguration dbConfig, string cond, string [] fields, string table) : base(split, inputClass, conf, conn, dbConfig, cond, fields, table, "ORACLE") { // Must initialize the tz used by the connection for Oracle. OracleDBRecordReader.SetSessionTimeZone(conf, conn); }
/// <param name="split">The InputSplit to read data for</param> /// <exception cref="Java.Sql.SQLException"></exception> public DataDrivenDBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass , Configuration conf, Connection conn, DBConfiguration dbConfig, string cond, string [] fields, string table, string dbProduct) : base(split, inputClass, conf, conn, dbConfig, cond, fields, table) { // database manufacturer string. this.dbProductName = dbProduct; }
/// <exception cref="System.IO.IOException"/> private static DBConfiguration SetOutput(Job job, string tableName) { job.SetOutputFormatClass(typeof(DBOutputFormat)); job.SetReduceSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job.GetConfiguration()); dbConf.SetOutputTableName(tableName); return(dbConf); }
/// <summary>Initializes the map-part of the job with the appropriate input settings. /// </summary> /// <param name="job">The map-reduce job</param> /// <param name="inputClass"> /// the class object implementing DBWritable, which is the /// Java object holding tuple fields. /// </param> /// <param name="inputQuery"> /// the input query to select fields. Example : /// "SELECT f1, f2, f3 FROM Mytable ORDER BY f1" /// </param> /// <param name="inputCountQuery"> /// the input query that returns /// the number of records in the table. /// Example : "SELECT COUNT(f1) FROM Mytable" /// </param> /// <seealso cref="DBInputFormat{T}.SetInput(Org.Apache.Hadoop.Mapreduce.Job, System.Type{T}, string, string, string, string[]) /// "/> public static void SetInput(Job job, Type inputClass, string inputQuery, string inputCountQuery ) { job.SetInputFormatClass(typeof(DBInputFormat)); DBConfiguration dbConf = new DBConfiguration(job.GetConfiguration()); dbConf.SetInputClass(inputClass); dbConf.SetInputQuery(inputQuery); dbConf.SetInputCountQuery(inputCountQuery); }
public override DBConfiguration GetDBConf() { string[] names = new string[] { "field1", "field2" }; DBConfiguration result = Org.Mockito.Mockito.Mock <DBConfiguration>(); Org.Mockito.Mockito.When(result.GetInputConditions()).ThenReturn("conditions"); Org.Mockito.Mockito.When(result.GetInputFieldNames()).ThenReturn(names); Org.Mockito.Mockito.When(result.GetInputTableName()).ThenReturn("table"); return(result); }
/// <summary>Returns the query for selecting the records from an Oracle DB.</summary> protected internal override string GetSelectQuery() { StringBuilder query = new StringBuilder(); DBConfiguration dbConf = GetDBConf(); string conditions = GetConditions(); string tableName = GetTableName(); string[] fieldNames = GetFieldNames(); // Oracle-specific codepath to use rownum instead of LIMIT/OFFSET. if (dbConf.GetInputQuery() == null) { query.Append("SELECT "); for (int i = 0; i < fieldNames.Length; i++) { query.Append(fieldNames[i]); if (i != fieldNames.Length - 1) { query.Append(", "); } } query.Append(" FROM ").Append(tableName); if (conditions != null && conditions.Length > 0) { query.Append(" WHERE ").Append(conditions); } string orderBy = dbConf.GetInputOrderBy(); if (orderBy != null && orderBy.Length > 0) { query.Append(" ORDER BY ").Append(orderBy); } } else { //PREBUILT QUERY query.Append(dbConf.GetInputQuery()); } try { DBInputFormat.DBInputSplit split = GetSplit(); if (split.GetLength() > 0) { string querystring = query.ToString(); query = new StringBuilder(); query.Append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( "); query.Append(querystring); query.Append(" ) a WHERE rownum <= ").Append(split.GetEnd()); query.Append(" ) WHERE dbif_rno > ").Append(split.GetStart()); } } catch (IOException) { } // ignore, will not throw. return(query.ToString()); }
/// <summary>Initializes the map-part of the job with the appropriate input settings. /// </summary> /// <param name="job">The map-reduce job</param> /// <param name="inputClass"> /// the class object implementing DBWritable, which is the /// Java object holding tuple fields. /// </param> /// <param name="tableName">The table to read data from</param> /// <param name="conditions"> /// The condition which to select data with, /// eg. '(updated > 20070101 AND length > 0)' /// </param> /// <param name="orderBy">the fieldNames in the orderBy clause.</param> /// <param name="fieldNames">The field names in the table</param> /// <seealso cref="DBInputFormat{T}.SetInput(Org.Apache.Hadoop.Mapreduce.Job, System.Type{T}, string, string) /// "/> public static void SetInput(Job job, Type inputClass, string tableName, string conditions , string orderBy, params string[] fieldNames) { job.SetInputFormatClass(typeof(DBInputFormat)); DBConfiguration dbConf = new DBConfiguration(job.GetConfiguration()); dbConf.SetInputClass(inputClass); dbConf.SetInputTableName(tableName); dbConf.SetInputFieldNames(fieldNames); dbConf.SetInputConditions(conditions); dbConf.SetInputOrderBy(orderBy); }
/// <param name="split">The InputSplit to read data for</param> /// <exception cref="Java.Sql.SQLException"></exception> public DBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass, Configuration conf, Connection conn, DBConfiguration dbConfig, string cond, string[] fields, string table) { this.inputClass = inputClass; this.split = split; this.conf = conf; this.connection = conn; this.dbConf = dbConfig; this.conditions = cond; this.fieldNames = fields; this.tableName = table; }
/// <exception cref="System.Exception"/> public virtual void TestDateSplits() { Statement s = connection.CreateStatement(); string DateTable = "datetable"; string Col = "foo"; try { // delete the table if it already exists. s.ExecuteUpdate("DROP TABLE " + DateTable); } catch (SQLException) { } // Create the table. s.ExecuteUpdate("CREATE TABLE " + DateTable + "(" + Col + " DATE)"); s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-04-01')"); s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-04-02')"); s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-05-01')"); s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2011-04-01')"); // commit this tx. connection.Commit(); Configuration conf = new Configuration(); conf.Set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.GetLocal(conf); fs.Delete(new Path(OutDir), true); // now do a dd import Job job = Job.GetInstance(conf); job.SetMapperClass(typeof(TestDataDrivenDBInputFormat.ValMapper)); job.SetReducerClass(typeof(Reducer)); job.SetMapOutputKeyClass(typeof(TestDataDrivenDBInputFormat.DateCol)); job.SetMapOutputValueClass(typeof(NullWritable)); job.SetOutputKeyClass(typeof(TestDataDrivenDBInputFormat.DateCol)); job.SetOutputValueClass(typeof(NullWritable)); job.SetNumReduceTasks(1); job.GetConfiguration().SetInt("mapreduce.map.tasks", 2); FileOutputFormat.SetOutputPath(job, new Path(OutDir)); DBConfiguration.ConfigureDB(job.GetConfiguration(), DriverClass, DbUrl, null, null ); DataDrivenDBInputFormat.SetInput(job, typeof(TestDataDrivenDBInputFormat.DateCol) , DateTable, null, Col, Col); bool ret = job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("job failed", ret); // Check to see that we imported as much as we thought we did. NUnit.Framework.Assert.AreEqual("Did not get all the records", 4, job.GetCounters ().FindCounter(TaskCounter.ReduceOutputRecords).GetValue()); }
/// <summary>test generate sql script for OracleDBRecordReader.</summary> /// <exception cref="System.Exception"/> public virtual void TestOracleDBRecordReader() { DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10); Configuration configuration = new Configuration(); Connection connect = DriverForTest.GetConnection(); DBConfiguration dbConfiguration = new DBConfiguration(configuration); dbConfiguration.SetInputOrderBy("Order"); string[] fields = new string[] { "f1", "f2" }; OracleDBRecordReader <DBInputFormat.NullDBWritable> recorder = new OracleDBRecordReader <DBInputFormat.NullDBWritable>(splitter, typeof(DBInputFormat.NullDBWritable), configuration , connect, dbConfiguration, "condition", fields, "table"); NUnit.Framework.Assert.AreEqual("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( SELECT f1, f2 FROM table WHERE condition ORDER BY Order ) a WHERE rownum <= 10 ) WHERE dbif_rno > 1" , recorder.GetSelectQuery()); }
/// <summary> /// <inheritDoc/> /// /// </summary> public virtual void SetConf(Configuration conf) { dbConf = new DBConfiguration(conf); try { this.connection = CreateConnection(); DatabaseMetaData dbMeta = connection.GetMetaData(); this.dbProductName = StringUtils.ToUpperCase(dbMeta.GetDatabaseProductName()); } catch (Exception ex) { throw new RuntimeException(ex); } tableName = dbConf.GetInputTableName(); fieldNames = dbConf.GetInputFieldNames(); conditions = dbConf.GetInputConditions(); }
/// <exception cref="System.IO.IOException"/> protected internal override RecordReader <LongWritable, T> CreateDBRecordReader(DBInputFormat.DBInputSplit split, Configuration conf) { DBConfiguration dbConf = GetDBConf(); Type inputClass = (Type)(dbConf.GetInputClass()); try { // Use Oracle-specific db reader return(new OracleDataDrivenDBRecordReader <T>(split, inputClass, conf, CreateConnection (), dbConf, dbConf.GetInputConditions(), dbConf.GetInputFieldNames(), dbConf.GetInputTableName ())); } catch (SQLException ex) { throw new IOException(ex.Message); } }
/// <exception cref="System.IO.IOException"/> public virtual void TestSetOutput() { Job job = Job.GetInstance(new Configuration()); DBOutputFormat.SetOutput(job, "hadoop_output", fieldNames); DBConfiguration dbConf = new DBConfiguration(job.GetConfiguration()); string actual = format.ConstructQuery(dbConf.GetOutputTableName(), dbConf.GetOutputFieldNames ()); NUnit.Framework.Assert.AreEqual(expected, actual); job = Job.GetInstance(new Configuration()); dbConf = new DBConfiguration(job.GetConfiguration()); DBOutputFormat.SetOutput(job, "hadoop_output", nullFieldNames.Length); NUnit.Framework.Assert.IsNull(dbConf.GetOutputFieldNames()); NUnit.Framework.Assert.AreEqual(nullFieldNames.Length, dbConf.GetOutputFieldCount ()); actual = format.ConstructQuery(dbConf.GetOutputTableName(), new string[dbConf.GetOutputFieldCount ()]); NUnit.Framework.Assert.AreEqual(nullExpected, actual); }
/// <summary> /// Initializes the reduce-part of the job with /// the appropriate output settings /// </summary> /// <param name="job">The job</param> /// <param name="tableName">The table to insert data into</param> /// <param name="fieldNames">The field names in the table.</param> /// <exception cref="System.IO.IOException"/> public static void SetOutput(Job job, string tableName, params string[] fieldNames ) { if (fieldNames.Length > 0 && fieldNames[0] != null) { DBConfiguration dbConf = SetOutput(job, tableName); dbConf.SetOutputFieldNames(fieldNames); } else { if (fieldNames.Length > 0) { SetOutput(job, tableName, fieldNames.Length); } else { throw new ArgumentException("Field names must be greater than 0"); } } }
/// <summary> /// <inheritDoc/> /// /// </summary> /// <exception cref="System.IO.IOException"/> public override RecordWriter <K, V> GetRecordWriter(TaskAttemptContext context) { DBConfiguration dbConf = new DBConfiguration(context.GetConfiguration()); string tableName = dbConf.GetOutputTableName(); string[] fieldNames = dbConf.GetOutputFieldNames(); if (fieldNames == null) { fieldNames = new string[dbConf.GetOutputFieldCount()]; } try { Connection connection = dbConf.GetConnection(); PreparedStatement statement = null; statement = connection.PrepareStatement(ConstructQuery(tableName, fieldNames)); return(new DBOutputFormat.DBRecordWriter(this, connection, statement)); } catch (Exception ex) { throw new IOException(ex.Message); } }
/// <summary> /// Returns the query for selecting the records, /// subclasses can override this for custom behaviour. /// </summary> protected internal override string GetSelectQuery() { StringBuilder query = new StringBuilder(); DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit = (DataDrivenDBInputFormat.DataDrivenDBInputSplit )GetSplit(); DBConfiguration dbConf = GetDBConf(); string[] fieldNames = GetFieldNames(); string tableName = GetTableName(); string conditions = GetConditions(); // Build the WHERE clauses associated with the data split first. // We need them in both branches of this function. StringBuilder conditionClauses = new StringBuilder(); conditionClauses.Append("( ").Append(dataSplit.GetLowerClause()); conditionClauses.Append(" ) AND ( ").Append(dataSplit.GetUpperClause()); conditionClauses.Append(" )"); if (dbConf.GetInputQuery() == null) { // We need to generate the entire query. query.Append("SELECT "); for (int i = 0; i < fieldNames.Length; i++) { query.Append(fieldNames[i]); if (i != fieldNames.Length - 1) { query.Append(", "); } } query.Append(" FROM ").Append(tableName); if (!dbProductName.StartsWith("ORACLE")) { // Seems to be necessary for hsqldb? Oracle explicitly does *not* // use this clause. query.Append(" AS ").Append(tableName); } query.Append(" WHERE "); if (conditions != null && conditions.Length > 0) { // Put the user's conditions first. query.Append("( ").Append(conditions).Append(" ) AND "); } // Now append the conditions associated with our split. query.Append(conditionClauses.ToString()); } else { // User provided the query. We replace the special token with our WHERE clause. string inputQuery = dbConf.GetInputQuery(); if (inputQuery.IndexOf(DataDrivenDBInputFormat.SubstituteToken) == -1) { Log.Error("Could not find the clause substitution token " + DataDrivenDBInputFormat .SubstituteToken + " in the query: [" + inputQuery + "]. Parallel splits may not work correctly." ); } query.Append(inputQuery.Replace(DataDrivenDBInputFormat.SubstituteToken, conditionClauses .ToString())); } Log.Debug("Using query: " + query.ToString()); return(query.ToString()); }
/// <exception cref="Java.Sql.SQLException"/> public MySQLDBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass, Configuration conf, Connection conn, DBConfiguration dbConfig, string cond, string[] fields, string table) : base(split, inputClass, conf, conn, dbConfig, cond, fields, table) { }
/// <summary> /// Initializes the reduce-part of the job /// with the appropriate output settings /// </summary> /// <param name="job">The job</param> /// <param name="tableName">The table to insert data into</param> /// <param name="fieldCount">the number of fields in the table.</param> /// <exception cref="System.IO.IOException"/> public static void SetOutput(Job job, string tableName, int fieldCount) { DBConfiguration dbConf = SetOutput(job, tableName); dbConf.SetOutputFieldCount(fieldCount); }