public void TestCardinality1() { BitSet set = new BitSet(); Assert.AreEqual(0, set.Cardinality()); for (int i = 0; i < 128; i++) { set.Set(i); Assert.AreEqual(i + 1, set.Cardinality()); Assert.AreEqual(0, set.NextSetBit(0)); if (i > 0) { Assert.AreEqual(i * 1 - 1, set.NextSetBit(i * 1 - 1)); } } }
public void TestFilteredDocSetIterator() { var set1 = new IntArrayDocIdSet(); for (int i = 0; i < 100; i++) { set1.AddDoc(2 * i); // 100 even numbers } var filteredIter = new MyFilteredDocSetIterator(set1.Iterator()); var bs = new BitSet(200); for (int i = 0; i < 100; ++i) { int n = 10 * i; if (n < 200) { bs.Set(n); } } try { int doc; while ((doc = filteredIter.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (!bs.Get(doc)) { Assert.Fail("failed: " + doc + " not in expected set"); return; } else { bs.Clear(doc); } } if (bs.Cardinality() > 0) { Assert.Fail("failed: leftover cardinality: " + bs.Cardinality()); } } catch (Exception e) { Assert.Fail(e.Message); } }
protected virtual void Condense(float[] floats) { if (floats.Length != _capacity) { throw new ArgumentException("bad input float array of length " + floats.Length + " for capacity: " + _capacity); } var bits = new BitSet(floats.Length); int on = 0; for (int i = 0; i < floats.Length; i++) { if (floats[i] != 0f) { bits.Set(i); on++; } } if (((float)on) / ((float)floats.Length) < ON_RATIO_CUTOFF) { // it's worth compressing if (0 == on) { // it's worth super-compressing _floats = null; _bits = null; _referencePoints = null; // capacity is good. } else { _bits = bits; _floats = new float[_bits.Cardinality()]; _referencePoints = new int[floats.Length / REFERENCE_POINT_EVERY]; int i = 0; int floatsIdx = 0; int refIdx = 0; while (i < floats.Length && (i = _bits.NextSetBit(i)) >= 0) { _floats[floatsIdx] = floats[i]; while (refIdx < i / REFERENCE_POINT_EVERY) { _referencePoints[refIdx++] = floatsIdx; } floatsIdx++; i++; } while (refIdx < _referencePoints.Length) { _referencePoints[refIdx++] = floatsIdx; } } } else { // it's not worth compressing _floats = floats; _bits = null; } }
// Stores status of decommissioning. // If node is not decommissioning, do not use this object for anything. public virtual int UpdateBlockReportContext(BlockReportContext context) { if (curBlockReportId != context.GetReportId()) { curBlockReportId = context.GetReportId(); curBlockReportRpcsSeen = new BitSet(context.GetTotalRpcs()); } curBlockReportRpcsSeen.Set(context.GetCurRpc()); return(curBlockReportRpcsSeen.Cardinality()); }
/// <summary> /// Returns the unique alternative predicted by all alternative subsets in /// <code>altsets</code> /// . If no such alternative exists, this method returns /// <see cref="ATN.InvalidAltNumber">ATN.InvalidAltNumber</see> /// . /// </summary> /// <param name="altsets">a collection of alternative subsets</param> public static int GetUniqueAlt(IEnumerable <BitSet> altsets) { BitSet all = GetAlts(altsets); if (all.Cardinality() == 1) { return(all.NextSetBit(0)); } return(ATN.InvalidAltNumber); }
/// <summary> /// Returns the unique alternative predicted by all alternative subsets in /// <paramref name="altsets"/> /// . If no such alternative exists, this method returns /// <see cref="ATN.INVALID_ALT_NUMBER"/> /// . /// </summary> /// <param name="altsets">a collection of alternative subsets</param> public static int GetUniqueAlt(IEnumerable <BitSet> altsets) { BitSet all = GetAlts(altsets); if (all.Cardinality() == 1) { return(all.NextSetBit(0)); } return(ATN.INVALID_ALT_NUMBER); }
public BitSet GetAncestors(int rule) { BitSet ancestors = new BitSet(); ancestors.Or(parents[rule]); while (true) { int cardinality = ancestors.Cardinality(); for (int i = ancestors.NextSetBit(0); i >= 0; i = ancestors.NextSetBit(i + 1)) { ancestors.Or(parents[i]); } if (ancestors.Cardinality() == cardinality) { // nothing changed break; } } return(ancestors); }
public BitSet GetDescendants(int rule) { BitSet descendants = new BitSet(); descendants.Or(children[rule]); while (true) { int cardinality = descendants.Cardinality(); for (int i = descendants.NextSetBit(0); i >= 0; i = descendants.NextSetBit(i + 1)) { descendants.Or(children[i]); } if (descendants.Cardinality() == cardinality) { // nothing changed break; } } return(descendants); }
public static int GetSingleViableAlt(IEnumerable <BitSet> altsets) { BitSet viableAlts = new BitSet(); foreach (BitSet alts in altsets) { int minAlt = alts.NextSetBit(0); viableAlts.Set(minAlt); if (viableAlts.Cardinality() > 1) { // more than 1 viable alt return(ATN.INVALID_ALT_NUMBER); } } return(viableAlts.NextSetBit(0)); }
public static int GetSingleViableAlt([NotNull] IEnumerable <BitSet> altsets) { BitSet viableAlts = new BitSet(); foreach (BitSet alts in altsets) { int minAlt = alts.NextSetBit(0); viableAlts.Set(minAlt); if (viableAlts.Cardinality() > 1) { // more than 1 viable alt return(ATN.InvalidAltNumber); } } return(viableAlts.NextSetBit(0)); }
/// <exception cref="System.IO.IOException"/> public virtual void TestFormat() { Log.Info("Test started"); Log.Info("Max split count = " + MaxSplitCount); Log.Info("Split count increment = " + SplitCountIncr); Log.Info("Max bytes per file = " + MaxBytes); Log.Info("Max number of files = " + MaxNumFiles); Log.Info("Number of files increment = " + NumFilesIncr); MultiFileInputFormat <Text, Text> format = new TestMultiFileInputFormat.DummyMultiFileInputFormat (this); FileSystem fs = FileSystem.GetLocal(job); for (int numFiles = 1; numFiles < MaxNumFiles; numFiles += (NumFilesIncr / 2) + rand .Next(NumFilesIncr / 2)) { Path dir = InitFiles(fs, numFiles, -1); BitSet bits = new BitSet(numFiles); for (int i = 1; i < MaxSplitCount; i += rand.Next(SplitCountIncr) + 1) { Log.Info("Running for Num Files=" + numFiles + ", split count=" + i); MultiFileSplit[] splits = (MultiFileSplit[])format.GetSplits(job, i); bits.Clear(); foreach (MultiFileSplit split in splits) { long splitLength = 0; foreach (Path p in split.GetPaths()) { long length = fs.GetContentSummary(p).GetLength(); NUnit.Framework.Assert.AreEqual(length, lengths[p.GetName()]); splitLength += length; string name = p.GetName(); int index = System.Convert.ToInt32(Sharpen.Runtime.Substring(name, name.LastIndexOf ("file_") + 5)); NUnit.Framework.Assert.IsFalse(bits.Get(index)); bits.Set(index); } NUnit.Framework.Assert.AreEqual(splitLength, split.GetLength()); } } NUnit.Framework.Assert.AreEqual(bits.Cardinality(), numFiles); fs.Delete(dir, true); } Log.Info("Test Finished"); }
public void TestGetScriptExtensions() { BitSet scripts = new BitSet(UScript.CodeLimit); /* invalid code points */ if (UScript.GetScriptExtensions(-1, scripts) != UScript.Unknown || scripts.Cardinality() != 1 || !scripts.Get(UScript.Unknown)) { Errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}"); } if (UScript.GetScriptExtensions(0x110000, scripts) != UScript.Unknown || scripts.Cardinality() != 1 || !scripts.Get(UScript.Unknown)) { Errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}"); } /* normal usage */ if (UScript.GetScriptExtensions(0x063f, scripts) != UScript.Arabic || scripts.Cardinality() != 1 || !scripts.Get(UScript.Arabic)) { Errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}"); } if (UScript.GetScriptExtensions(0x0640, scripts) > -3 || scripts.Cardinality() < 3 || !scripts.Get(UScript.Arabic) || !scripts.Get(UScript.Syriac) || !scripts.Get(UScript.Mandaic) ) { Errln("UScript.getScriptExtensions(U+0640) failed"); } if (UScript.GetScriptExtensions(0xfdf2, scripts) != -2 || scripts.Cardinality() != 2 || !scripts.Get(UScript.Arabic) || !scripts.Get(UScript.Thaana)) { Errln("UScript.getScriptExtensions(U+FDF2) failed"); } if (UScript.GetScriptExtensions(0xff65, scripts) != -6 || scripts.Cardinality() != 6 || !scripts.Get(UScript.Bopomofo) || !scripts.Get(UScript.Yi)) { Errln("UScript.getScriptExtensions(U+FF65) failed"); } }
public virtual void TestSplitableCodecs() { Job job = Job.GetInstance(defaultConf); Configuration conf = job.GetConfiguration(); // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec" ), conf); } catch (TypeLoadException) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test" + codec.GetDefaultExtension()); int seed = new Random().Next(); Log.Info("seed = " + seed); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int MaxLength = 500000; FileInputFormat.SetMaxInputSplitSize(job, MaxLength / 20); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 4) + 1) { Log.Info("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create (file))); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i * 2)); writer.Write("\t"); writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); NUnit.Framework.Assert.IsTrue("KVTIF claims not splittable", format.IsSplitable(job , file)); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 2000) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Count; j++) { Log.Debug("split[" + j + "]= " + splits[j]); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context); Type clazz = reader.GetType(); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[j]); reader.Initialize(splits[j], mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); value = reader.GetCurrentValue(); int k = System.Convert.ToInt32(key.ToString()); int v = System.Convert.ToInt32(value.ToString()); NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2); NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v); Log.Debug("read " + k + "," + v); NUnit.Framework.Assert.IsFalse(k + "," + v + " in multiple partitions.", bits.Get (v)); bits.Set(v); count++; } if (count > 0) { Log.Info("splits[" + j + "]=" + splits[j] + " count=" + count); } else { Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count); } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(conf); FileSystem fs = FileSystem.GetLocal(conf); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Reporter reporter = Reporter.Null; int seed = new Random().Next(); //LOG.info("seed = "+seed); Random random = new Random(seed); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) + 1) { //LOG.info("creating; entries = " + length); // create a file with length entries SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(IntWritable ), typeof(LongWritable)); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); LongWritable value = new LongWritable(10 * i); writer.Append(key, value); } } finally { writer.Close(); } // try splitting the file in a variety of sizes InputFormat <Text, Text> format = new SequenceFileAsTextInputFormat(); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1; //LOG.info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(job, numSplits); //LOG.info("splitting: got = " + splits.length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Length; j++) { RecordReader <Text, Text> reader = format.GetRecordReader(splits[j], job, reporter ); Type readerClass = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is SequenceFileAsTextRecordReader." , typeof(SequenceFileAsTextRecordReader), readerClass); Text value = reader.CreateValue(); Text key = reader.CreateKey(); try { int count = 0; while (reader.Next(key, value)) { // if (bits.get(key.get())) { // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get()); // LOG.info("@"+reader.getPos()); // } int keyInt = System.Convert.ToInt32(key.ToString()); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(keyInt)); bits.Set(keyInt); count++; } } finally { //LOG.info("splits["+j+"]="+splits[j]+" count=" + count); reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
// A reporter that does nothing /// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(defaultConf); Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; CreateFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); LongWritable key = new LongWritable(); Text value = new Text(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / 20) + 1; Log.Info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Info("splitting: got = " + splits.Length); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check the split BitSet bits = new BitSet(length); Log.Debug("split= " + split); RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, job, voidReporter ); try { int count = 0; while (reader.Next(key, value)) { int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); if (bits.Get(v)) { Log.Warn("conflict with " + v + " at position " + reader.GetPos()); } NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Info("splits=" + split + " count=" + count); } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(conf); Reporter reporter = Reporter.Null; Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create a file with various lengths CreateFiles(length, numFiles, random); // create a combine split for the files InputFormat <IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat <IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / (SequenceFile.SyncInterval / 20)) + 1; Log.Info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Info("splitting: got = " + splits.Length); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check each split BitSet bits = new BitSet(length); RecordReader <IntWritable, BytesWritable> reader = format.GetRecordReader(split, job , reporter); try { while (reader.Next(key, value)) { NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(key.Get()) ); bits.Set(key.Get()); } } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
/// <exception cref="System.IO.IOException"/> public virtual void TestSplitableCodecs() { JobConf conf = new JobConf(defaultConf); int seed = new Random().Next(); // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec" ), conf); } catch (TypeLoadException) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test" + codec.GetDefaultExtension()); // A reporter that does nothing Reporter reporter = Reporter.Null; Log.Info("seed = " + seed); Random random = new Random(seed); FileSystem localFs = FileSystem.GetLocal(conf); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(conf, workDir); int MaxLength = 500000; // for a variety of lengths for (int length = MaxLength / 2; length < MaxLength; length += random.Next(MaxLength / 4) + 1) { Log.Info("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create (file))); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.Configure(conf); LongWritable key = new LongWritable(); Text value = new Text(); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 2000) + 1; Log.Info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(conf, numSplits); Log.Info("splitting: got = " + splits.Length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Length; j++) { Log.Debug("split[" + j + "]= " + splits[j]); RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], conf, reporter); try { int counter = 0; while (reader.Next(key, value)) { int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); if (bits.Get(v)) { Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos ()); } NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); counter++; } if (counter > 0) { Log.Info("splits[" + j + "]=" + splits[j] + " count=" + counter); } else { Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + counter); } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(); Path file = new Path(workDir, "test.txt"); // A reporter that does nothing Reporter reporter = Reporter.Null; int seed = new Random().Next(); Log.Info("seed = " + seed); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) + 1) { Log.Debug("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(localFs.Create(file)); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i * 2)); writer.Write("\t"); writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); format.Configure(job); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 20) + 1; Log.Debug("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Debug("splitting: got = " + splits.Length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Length; j++) { Log.Debug("split[" + j + "]= " + splits[j]); RecordReader <Text, Text> reader = format.GetRecordReader(splits[j], job, reporter ); Type readerClass = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof( KeyValueLineRecordReader), readerClass); Text key = reader.CreateKey(); Type keyClass = key.GetType(); Text value = reader.CreateValue(); Type valueClass = value.GetType(); NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), keyClass); NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass); try { int count = 0; while (reader.Next(key, value)) { int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); if (bits.Get(v)) { Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos ()); } NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
public virtual void TestFormat() { Job job = Job.GetInstance(new Configuration(defaultConf)); Path file = new Path(workDir, "test.txt"); int seed = new Random().Next(); Log.Info("seed = " + seed); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int MaxLength = 10000; // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) + 1) { Log.Debug("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(localFs.Create(file)); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i * 2)); writer.Write("\t"); writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 20) + 1; Log.Debug("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Debug("splitting: got = " + splits.Count); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Count; j++) { Log.Debug("split[" + j + "]= " + splits[j]); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof( KeyValueLineRecordReader), clazz); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[j]); reader.Initialize(splits[j], mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); clazz = key.GetType(); NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), clazz); value = reader.GetCurrentValue(); clazz = value.GetType(); NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), clazz); int k = System.Convert.ToInt32(key.ToString()); int v = System.Convert.ToInt32(value.ToString()); NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2); NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v); Log.Debug("read " + v); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { Job job = Job.GetInstance(new Configuration(defaultConf)); Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create files with various lengths CreateFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / 20) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check the split BitSet bits = new BitSet(length); Log.Debug("split= " + split); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context ); NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof( CombineFileRecordReader), reader.GetType()); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int count = 0; while (reader.NextKeyValue()) { LongWritable key = reader.GetCurrentKey(); NUnit.Framework.Assert.IsNotNull("Key should not be null.", key); Text value = reader.GetCurrentValue(); int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("split=" + split + " count=" + count); } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { Job job = Job.GetInstance(conf); FileSystem fs = FileSystem.GetLocal(conf); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); int seed = new Random().Next(); Random random = new Random(seed); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) + 1) { // create a file with length entries SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(IntWritable ), typeof(LongWritable)); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); LongWritable value = new LongWritable(10 * i); writer.Append(key, value); } } finally { writer.Close(); } TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); // try splitting the file in a variety of sizes InputFormat <Text, Text> format = new SequenceFileAsTextInputFormat(); for (int i_1 = 0; i_1 < 3; i_1++) { // check each split BitSet bits = new BitSet(length); int numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1; FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(file).GetLen() / numSplits ); foreach (InputSplit split in format.GetSplits(job)) { RecordReader <Text, Text> reader = format.CreateRecordReader(split, context); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); Type readerClass = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is SequenceFileAsTextRecordReader." , typeof(SequenceFileAsTextRecordReader), readerClass); Text key; try { int count = 0; while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); int keyInt = System.Convert.ToInt32(key.ToString()); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(keyInt)); bits.Set(keyInt); count++; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }