Esempio n. 1
0
        /// <exception cref="System.IO.IOException"/>
        private static IList <Text> ReadSplit(KeyValueTextInputFormat format, InputSplit split
                                              , JobConf job)
        {
            IList <Text> result = new AList <Text>();
            RecordReader <Text, Text> reader = null;

            try
            {
                reader = format.GetRecordReader(split, job, voidReporter);
                Text key   = reader.CreateKey();
                Text value = reader.CreateValue();
                while (reader.Next(key, value))
                {
                    result.AddItem(value);
                    value = (Text)reader.CreateValue();
                }
            }
            finally
            {
                if (reader != null)
                {
                    reader.Close();
                }
            }
            return(result);
        }
Esempio n. 2
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf job  = new JobConf();
            Path    file = new Path(workDir, "test.txt");
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            int      seed     = new Random().Next();

            Log.Info("seed = " + seed);
            Random random = new Random(seed);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                Log.Debug("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(localFs.Create(file));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i * 2));
                        writer.Write("\t");
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                KeyValueTextInputFormat format = new KeyValueTextInputFormat();
                format.Configure(job);
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 20) + 1;
                    Log.Debug("splitting: requesting = " + numSplits);
                    InputSplit[] splits = format.GetSplits(job, numSplits);
                    Log.Debug("splitting: got =        " + splits.Length);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Length; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        RecordReader <Text, Text> reader = format.GetRecordReader(splits[j], job, reporter
                                                                                  );
                        Type readerClass = reader.GetType();
                        NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof(
                                                            KeyValueLineRecordReader), readerClass);
                        Text key        = reader.CreateKey();
                        Type keyClass   = key.GetType();
                        Text value      = reader.CreateValue();
                        Type valueClass = value.GetType();
                        NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), keyClass);
                        NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass);
                        try
                        {
                            int count = 0;
                            while (reader.Next(key, value))
                            {
                                int v = System.Convert.ToInt32(value.ToString());
                                Log.Debug("read " + v);
                                if (bits.Get(v))
                                {
                                    Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos
                                                 ());
                                }
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                                bits.Set(v);
                                count++;
                            }
                            Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }