Example #1
0
 /// <exception cref="System.Exception"/>
 public virtual void TestPercentFilter()
 {
     Log.Info("Testing Percent Filter with frequency: 1000");
     // set the filter class
     SequenceFileInputFilter.SetFilterClass(job, typeof(SequenceFileInputFilter.PercentFilter
                                                        ));
     SequenceFileInputFilter.PercentFilter.SetFrequency(job.GetConfiguration(), 1000);
     // clean input dir
     fs.Delete(inDir, true);
     // for a variety of lengths
     for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                        1)
     {
         Log.Info("******Number of records: " + length);
         CreateSequenceFile(length);
         int count = CountRecords(1);
         Log.Info("Accepted " + count + " records");
         int expectedCount = length / 1000;
         if (expectedCount * 1000 != length)
         {
             expectedCount++;
         }
         NUnit.Framework.Assert.AreEqual(count, expectedCount);
     }
     // clean up
     fs.Delete(inDir, true);
 }
Example #2
0
 /// <exception cref="System.Exception"/>
 public virtual void TestMD5Filter()
 {
     // set the filter class
     Log.Info("Testing MD5 Filter with frequency: 1000");
     SequenceFileInputFilter.SetFilterClass(job, typeof(SequenceFileInputFilter.MD5Filter
                                                        ));
     SequenceFileInputFilter.MD5Filter.SetFrequency(job.GetConfiguration(), 1000);
     // clean input dir
     fs.Delete(inDir, true);
     // for a variety of lengths
     for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                        1)
     {
         Log.Info("******Number of records: " + length);
         CreateSequenceFile(length);
         Log.Info("Accepted " + CountRecords(0) + " records");
     }
     // clean up
     fs.Delete(inDir, true);
 }
Example #3
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private int CountRecords(int numSplits)
        {
            InputFormat <Text, BytesWritable> format = new SequenceFileInputFilter <Text, BytesWritable
                                                                                    >();

            if (numSplits == 0)
            {
                numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1;
            }
            FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(inFile).GetLen() / numSplits
                                                 );
            TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                            .GetConfiguration());
            // check each split
            int count = 0;

            foreach (InputSplit split in format.GetSplits(job))
            {
                RecordReader <Text, BytesWritable> reader = format.CreateRecordReader(split, context
                                                                                      );
                MapContext <Text, BytesWritable, Text, BytesWritable> mcontext = new MapContextImpl
                                                                                 <Text, BytesWritable, Text, BytesWritable>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    while (reader.NextKeyValue())
                    {
                        Log.Info("Accept record " + reader.GetCurrentKey().ToString());
                        count++;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            return(count);
        }
Example #4
0
 /// <exception cref="System.Exception"/>
 public virtual void TestRegexFilter()
 {
     // set the filter class
     Log.Info("Testing Regex Filter with patter: \\A10*");
     SequenceFileInputFilter.SetFilterClass(job, typeof(SequenceFileInputFilter.RegexFilter
                                                        ));
     SequenceFileInputFilter.RegexFilter.SetPattern(job.GetConfiguration(), "\\A10*");
     // clean input dir
     fs.Delete(inDir, true);
     // for a variety of lengths
     for (int length = 1; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                        1)
     {
         Log.Info("******Number of records: " + length);
         CreateSequenceFile(length);
         int count = CountRecords(0);
         NUnit.Framework.Assert.AreEqual(count, length == 0 ? 0 : (int)Math.Log10(length)
                                         + 1);
     }
     // clean up
     fs.Delete(inDir, true);
 }