Ejemplo n.º 1
0
 /// <exception cref="Java.Sql.SQLException"/>
 public OracleDBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass, Configuration
                             conf, Connection conn, DBConfiguration dbConfig, string cond, string[] fields,
                             string table)
     : base(split, inputClass, conf, conn, dbConfig, cond, fields, table)
 {
     SetSessionTimeZone(conf, conn);
 }
Ejemplo n.º 2
0
        /// <exception cref="System.IO.IOException"/>
        protected internal override RecordReader <LongWritable, T> CreateDBRecordReader(DBInputFormat.DBInputSplit
                                                                                        split, Configuration conf)
        {
            DBConfiguration dbConf        = GetDBConf();
            Type            inputClass    = (Type)(dbConf.GetInputClass());
            string          dbProductName = GetDBProductName();

            Log.Debug("Creating db record reader for db product: " + dbProductName);
            try
            {
                // use database product name to determine appropriate record reader.
                if (dbProductName.StartsWith("MYSQL"))
                {
                    // use MySQL-specific db reader.
                    return(new MySQLDataDrivenDBRecordReader <T>(split, inputClass, conf, CreateConnection
                                                                     (), dbConf, dbConf.GetInputConditions(), dbConf.GetInputFieldNames(), dbConf.GetInputTableName
                                                                     ()));
                }
                else
                {
                    // Generic reader.
                    return(new DataDrivenDBRecordReader <T>(split, inputClass, conf, CreateConnection(
                                                                ), dbConf, dbConf.GetInputConditions(), dbConf.GetInputFieldNames(), dbConf.GetInputTableName
                                                                (), dbProductName));
                }
            }
            catch (SQLException ex)
            {
                throw new IOException(ex.Message);
            }
        }
 /// <exception cref="Java.Sql.SQLException"/>
 public OracleDataDrivenDBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass
                                       , Configuration conf, Connection conn, DBConfiguration dbConfig, string cond, string
                                       [] fields, string table)
     : base(split, inputClass, conf, conn, dbConfig, cond, fields, table, "ORACLE")
 {
     // Must initialize the tz used by the connection for Oracle.
     OracleDBRecordReader.SetSessionTimeZone(conf, conn);
 }
Ejemplo n.º 4
0
 /// <param name="split">The InputSplit to read data for</param>
 /// <exception cref="Java.Sql.SQLException"></exception>
 public DataDrivenDBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass
                                 , Configuration conf, Connection conn, DBConfiguration dbConfig, string cond, string
                                 [] fields, string table, string dbProduct)
     : base(split, inputClass, conf, conn, dbConfig, cond, fields, table)
 {
     // database manufacturer string.
     this.dbProductName = dbProduct;
 }
Ejemplo n.º 5
0
        /// <exception cref="System.IO.IOException"/>
        private static DBConfiguration SetOutput(Job job, string tableName)
        {
            job.SetOutputFormatClass(typeof(DBOutputFormat));
            job.SetReduceSpeculativeExecution(false);
            DBConfiguration dbConf = new DBConfiguration(job.GetConfiguration());

            dbConf.SetOutputTableName(tableName);
            return(dbConf);
        }
Ejemplo n.º 6
0
        /// <summary>Initializes the map-part of the job with the appropriate input settings.
        ///     </summary>
        /// <param name="job">The map-reduce job</param>
        /// <param name="inputClass">
        /// the class object implementing DBWritable, which is the
        /// Java object holding tuple fields.
        /// </param>
        /// <param name="inputQuery">
        /// the input query to select fields. Example :
        /// "SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
        /// </param>
        /// <param name="inputCountQuery">
        /// the input query that returns
        /// the number of records in the table.
        /// Example : "SELECT COUNT(f1) FROM Mytable"
        /// </param>
        /// <seealso cref="DBInputFormat{T}.SetInput(Org.Apache.Hadoop.Mapreduce.Job, System.Type{T}, string, string, string, string[])
        ///     "/>
        public static void SetInput(Job job, Type inputClass, string inputQuery, string inputCountQuery
                                    )
        {
            job.SetInputFormatClass(typeof(DBInputFormat));
            DBConfiguration dbConf = new DBConfiguration(job.GetConfiguration());

            dbConf.SetInputClass(inputClass);
            dbConf.SetInputQuery(inputQuery);
            dbConf.SetInputCountQuery(inputCountQuery);
        }
Ejemplo n.º 7
0
            public override DBConfiguration GetDBConf()
            {
                string[]        names  = new string[] { "field1", "field2" };
                DBConfiguration result = Org.Mockito.Mockito.Mock <DBConfiguration>();

                Org.Mockito.Mockito.When(result.GetInputConditions()).ThenReturn("conditions");
                Org.Mockito.Mockito.When(result.GetInputFieldNames()).ThenReturn(names);
                Org.Mockito.Mockito.When(result.GetInputTableName()).ThenReturn("table");
                return(result);
            }
Ejemplo n.º 8
0
        /// <summary>Returns the query for selecting the records from an Oracle DB.</summary>
        protected internal override string GetSelectQuery()
        {
            StringBuilder   query      = new StringBuilder();
            DBConfiguration dbConf     = GetDBConf();
            string          conditions = GetConditions();
            string          tableName  = GetTableName();

            string[] fieldNames = GetFieldNames();
            // Oracle-specific codepath to use rownum instead of LIMIT/OFFSET.
            if (dbConf.GetInputQuery() == null)
            {
                query.Append("SELECT ");
                for (int i = 0; i < fieldNames.Length; i++)
                {
                    query.Append(fieldNames[i]);
                    if (i != fieldNames.Length - 1)
                    {
                        query.Append(", ");
                    }
                }
                query.Append(" FROM ").Append(tableName);
                if (conditions != null && conditions.Length > 0)
                {
                    query.Append(" WHERE ").Append(conditions);
                }
                string orderBy = dbConf.GetInputOrderBy();
                if (orderBy != null && orderBy.Length > 0)
                {
                    query.Append(" ORDER BY ").Append(orderBy);
                }
            }
            else
            {
                //PREBUILT QUERY
                query.Append(dbConf.GetInputQuery());
            }
            try
            {
                DBInputFormat.DBInputSplit split = GetSplit();
                if (split.GetLength() > 0)
                {
                    string querystring = query.ToString();
                    query = new StringBuilder();
                    query.Append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( ");
                    query.Append(querystring);
                    query.Append(" ) a WHERE rownum <= ").Append(split.GetEnd());
                    query.Append(" ) WHERE dbif_rno > ").Append(split.GetStart());
                }
            }
            catch (IOException)
            {
            }
            // ignore, will not throw.
            return(query.ToString());
        }
Ejemplo n.º 9
0
        /// <summary>Initializes the map-part of the job with the appropriate input settings.
        ///     </summary>
        /// <param name="job">The map-reduce job</param>
        /// <param name="inputClass">
        /// the class object implementing DBWritable, which is the
        /// Java object holding tuple fields.
        /// </param>
        /// <param name="tableName">The table to read data from</param>
        /// <param name="conditions">
        /// The condition which to select data with,
        /// eg. '(updated &gt; 20070101 AND length &gt; 0)'
        /// </param>
        /// <param name="orderBy">the fieldNames in the orderBy clause.</param>
        /// <param name="fieldNames">The field names in the table</param>
        /// <seealso cref="DBInputFormat{T}.SetInput(Org.Apache.Hadoop.Mapreduce.Job, System.Type{T}, string, string)
        ///     "/>
        public static void SetInput(Job job, Type inputClass, string tableName, string conditions
                                    , string orderBy, params string[] fieldNames)
        {
            job.SetInputFormatClass(typeof(DBInputFormat));
            DBConfiguration dbConf = new DBConfiguration(job.GetConfiguration());

            dbConf.SetInputClass(inputClass);
            dbConf.SetInputTableName(tableName);
            dbConf.SetInputFieldNames(fieldNames);
            dbConf.SetInputConditions(conditions);
            dbConf.SetInputOrderBy(orderBy);
        }
Ejemplo n.º 10
0
 /// <param name="split">The InputSplit to read data for</param>
 /// <exception cref="Java.Sql.SQLException"></exception>
 public DBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass, Configuration
                       conf, Connection conn, DBConfiguration dbConfig, string cond, string[] fields,
                       string table)
 {
     this.inputClass = inputClass;
     this.split      = split;
     this.conf       = conf;
     this.connection = conn;
     this.dbConf     = dbConfig;
     this.conditions = cond;
     this.fieldNames = fields;
     this.tableName  = table;
 }
Ejemplo n.º 11
0
        /// <exception cref="System.Exception"/>
        public virtual void TestDateSplits()
        {
            Statement s         = connection.CreateStatement();
            string    DateTable = "datetable";
            string    Col       = "foo";

            try
            {
                // delete the table if it already exists.
                s.ExecuteUpdate("DROP TABLE " + DateTable);
            }
            catch (SQLException)
            {
            }
            // Create the table.
            s.ExecuteUpdate("CREATE TABLE " + DateTable + "(" + Col + " DATE)");
            s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-04-01')");
            s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-04-02')");
            s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-05-01')");
            s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2011-04-01')");
            // commit this tx.
            connection.Commit();
            Configuration conf = new Configuration();

            conf.Set("fs.defaultFS", "file:///");
            FileSystem fs = FileSystem.GetLocal(conf);

            fs.Delete(new Path(OutDir), true);
            // now do a dd import
            Job job = Job.GetInstance(conf);

            job.SetMapperClass(typeof(TestDataDrivenDBInputFormat.ValMapper));
            job.SetReducerClass(typeof(Reducer));
            job.SetMapOutputKeyClass(typeof(TestDataDrivenDBInputFormat.DateCol));
            job.SetMapOutputValueClass(typeof(NullWritable));
            job.SetOutputKeyClass(typeof(TestDataDrivenDBInputFormat.DateCol));
            job.SetOutputValueClass(typeof(NullWritable));
            job.SetNumReduceTasks(1);
            job.GetConfiguration().SetInt("mapreduce.map.tasks", 2);
            FileOutputFormat.SetOutputPath(job, new Path(OutDir));
            DBConfiguration.ConfigureDB(job.GetConfiguration(), DriverClass, DbUrl, null, null
                                        );
            DataDrivenDBInputFormat.SetInput(job, typeof(TestDataDrivenDBInputFormat.DateCol)
                                             , DateTable, null, Col, Col);
            bool ret = job.WaitForCompletion(true);

            NUnit.Framework.Assert.IsTrue("job failed", ret);
            // Check to see that we imported as much as we thought we did.
            NUnit.Framework.Assert.AreEqual("Did not get all the records", 4, job.GetCounters
                                                ().FindCounter(TaskCounter.ReduceOutputRecords).GetValue());
        }
Ejemplo n.º 12
0
        /// <summary>test generate sql script for OracleDBRecordReader.</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestOracleDBRecordReader()
        {
            DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10);
            Configuration   configuration       = new Configuration();
            Connection      connect             = DriverForTest.GetConnection();
            DBConfiguration dbConfiguration     = new DBConfiguration(configuration);

            dbConfiguration.SetInputOrderBy("Order");
            string[] fields = new string[] { "f1", "f2" };
            OracleDBRecordReader <DBInputFormat.NullDBWritable> recorder = new OracleDBRecordReader
                                                                           <DBInputFormat.NullDBWritable>(splitter, typeof(DBInputFormat.NullDBWritable), configuration
                                                                                                          , connect, dbConfiguration, "condition", fields, "table");

            NUnit.Framework.Assert.AreEqual("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( SELECT f1, f2 FROM table WHERE condition ORDER BY Order ) a WHERE rownum <= 10 ) WHERE dbif_rno > 1"
                                            , recorder.GetSelectQuery());
        }
Ejemplo n.º 13
0
 /// <summary>
 /// <inheritDoc/>
 ///
 /// </summary>
 public virtual void SetConf(Configuration conf)
 {
     dbConf = new DBConfiguration(conf);
     try
     {
         this.connection = CreateConnection();
         DatabaseMetaData dbMeta = connection.GetMetaData();
         this.dbProductName = StringUtils.ToUpperCase(dbMeta.GetDatabaseProductName());
     }
     catch (Exception ex)
     {
         throw new RuntimeException(ex);
     }
     tableName  = dbConf.GetInputTableName();
     fieldNames = dbConf.GetInputFieldNames();
     conditions = dbConf.GetInputConditions();
 }
        /// <exception cref="System.IO.IOException"/>
        protected internal override RecordReader <LongWritable, T> CreateDBRecordReader(DBInputFormat.DBInputSplit
                                                                                        split, Configuration conf)
        {
            DBConfiguration dbConf     = GetDBConf();
            Type            inputClass = (Type)(dbConf.GetInputClass());

            try
            {
                // Use Oracle-specific db reader
                return(new OracleDataDrivenDBRecordReader <T>(split, inputClass, conf, CreateConnection
                                                                  (), dbConf, dbConf.GetInputConditions(), dbConf.GetInputFieldNames(), dbConf.GetInputTableName
                                                                  ()));
            }
            catch (SQLException ex)
            {
                throw new IOException(ex.Message);
            }
        }
Ejemplo n.º 15
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestSetOutput()
        {
            Job job = Job.GetInstance(new Configuration());

            DBOutputFormat.SetOutput(job, "hadoop_output", fieldNames);
            DBConfiguration dbConf = new DBConfiguration(job.GetConfiguration());
            string          actual = format.ConstructQuery(dbConf.GetOutputTableName(), dbConf.GetOutputFieldNames
                                                               ());

            NUnit.Framework.Assert.AreEqual(expected, actual);
            job    = Job.GetInstance(new Configuration());
            dbConf = new DBConfiguration(job.GetConfiguration());
            DBOutputFormat.SetOutput(job, "hadoop_output", nullFieldNames.Length);
            NUnit.Framework.Assert.IsNull(dbConf.GetOutputFieldNames());
            NUnit.Framework.Assert.AreEqual(nullFieldNames.Length, dbConf.GetOutputFieldCount
                                                ());
            actual = format.ConstructQuery(dbConf.GetOutputTableName(), new string[dbConf.GetOutputFieldCount
                                                                                       ()]);
            NUnit.Framework.Assert.AreEqual(nullExpected, actual);
        }
Ejemplo n.º 16
0
 /// <summary>
 /// Initializes the reduce-part of the job with
 /// the appropriate output settings
 /// </summary>
 /// <param name="job">The job</param>
 /// <param name="tableName">The table to insert data into</param>
 /// <param name="fieldNames">The field names in the table.</param>
 /// <exception cref="System.IO.IOException"/>
 public static void SetOutput(Job job, string tableName, params string[] fieldNames
                              )
 {
     if (fieldNames.Length > 0 && fieldNames[0] != null)
     {
         DBConfiguration dbConf = SetOutput(job, tableName);
         dbConf.SetOutputFieldNames(fieldNames);
     }
     else
     {
         if (fieldNames.Length > 0)
         {
             SetOutput(job, tableName, fieldNames.Length);
         }
         else
         {
             throw new ArgumentException("Field names must be greater than 0");
         }
     }
 }
Ejemplo n.º 17
0
        /// <summary>
        /// <inheritDoc/>
        ///
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        public override RecordWriter <K, V> GetRecordWriter(TaskAttemptContext context)
        {
            DBConfiguration dbConf    = new DBConfiguration(context.GetConfiguration());
            string          tableName = dbConf.GetOutputTableName();

            string[] fieldNames = dbConf.GetOutputFieldNames();
            if (fieldNames == null)
            {
                fieldNames = new string[dbConf.GetOutputFieldCount()];
            }
            try
            {
                Connection        connection = dbConf.GetConnection();
                PreparedStatement statement  = null;
                statement = connection.PrepareStatement(ConstructQuery(tableName, fieldNames));
                return(new DBOutputFormat.DBRecordWriter(this, connection, statement));
            }
            catch (Exception ex)
            {
                throw new IOException(ex.Message);
            }
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Returns the query for selecting the records,
        /// subclasses can override this for custom behaviour.
        /// </summary>
        protected internal override string GetSelectQuery()
        {
            StringBuilder query = new StringBuilder();

            DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit = (DataDrivenDBInputFormat.DataDrivenDBInputSplit
                                                                        )GetSplit();
            DBConfiguration dbConf = GetDBConf();

            string[] fieldNames = GetFieldNames();
            string   tableName  = GetTableName();
            string   conditions = GetConditions();
            // Build the WHERE clauses associated with the data split first.
            // We need them in both branches of this function.
            StringBuilder conditionClauses = new StringBuilder();

            conditionClauses.Append("( ").Append(dataSplit.GetLowerClause());
            conditionClauses.Append(" ) AND ( ").Append(dataSplit.GetUpperClause());
            conditionClauses.Append(" )");
            if (dbConf.GetInputQuery() == null)
            {
                // We need to generate the entire query.
                query.Append("SELECT ");
                for (int i = 0; i < fieldNames.Length; i++)
                {
                    query.Append(fieldNames[i]);
                    if (i != fieldNames.Length - 1)
                    {
                        query.Append(", ");
                    }
                }
                query.Append(" FROM ").Append(tableName);
                if (!dbProductName.StartsWith("ORACLE"))
                {
                    // Seems to be necessary for hsqldb? Oracle explicitly does *not*
                    // use this clause.
                    query.Append(" AS ").Append(tableName);
                }
                query.Append(" WHERE ");
                if (conditions != null && conditions.Length > 0)
                {
                    // Put the user's conditions first.
                    query.Append("( ").Append(conditions).Append(" ) AND ");
                }
                // Now append the conditions associated with our split.
                query.Append(conditionClauses.ToString());
            }
            else
            {
                // User provided the query. We replace the special token with our WHERE clause.
                string inputQuery = dbConf.GetInputQuery();
                if (inputQuery.IndexOf(DataDrivenDBInputFormat.SubstituteToken) == -1)
                {
                    Log.Error("Could not find the clause substitution token " + DataDrivenDBInputFormat
                              .SubstituteToken + " in the query: [" + inputQuery + "]. Parallel splits may not work correctly."
                              );
                }
                query.Append(inputQuery.Replace(DataDrivenDBInputFormat.SubstituteToken, conditionClauses
                                                .ToString()));
            }
            Log.Debug("Using query: " + query.ToString());
            return(query.ToString());
        }
Ejemplo n.º 19
0
 /// <exception cref="Java.Sql.SQLException"/>
 public MySQLDBRecordReader(DBInputFormat.DBInputSplit split, Type inputClass, Configuration
                            conf, Connection conn, DBConfiguration dbConfig, string cond, string[] fields,
                            string table)
     : base(split, inputClass, conf, conn, dbConfig, cond, fields, table)
 {
 }
Ejemplo n.º 20
0
        /// <summary>
        /// Initializes the reduce-part of the job
        /// with the appropriate output settings
        /// </summary>
        /// <param name="job">The job</param>
        /// <param name="tableName">The table to insert data into</param>
        /// <param name="fieldCount">the number of fields in the table.</param>
        /// <exception cref="System.IO.IOException"/>
        public static void SetOutput(Job job, string tableName, int fieldCount)
        {
            DBConfiguration dbConf = SetOutput(job, tableName);

            dbConf.SetOutputFieldCount(fieldCount);
        }