/// <exception cref="System.Exception"/> public virtual void TestMembershipTest() { // write the file FileSystem fs = FileSystem.GetLocal(conf); Path qualifiedDirName = fs.MakeQualified(TestDir); conf.SetInt("io.mapfile.bloom.size", 2048); BloomMapFile.Writer writer = null; BloomMapFile.Reader reader = null; try { writer = new BloomMapFile.Writer(conf, fs, qualifiedDirName.ToString(), typeof(IntWritable ), typeof(Text)); IntWritable key = new IntWritable(); Text value = new Text(); for (int i = 0; i < 2000; i += 2) { key.Set(i); value.Set("00" + i); writer.Append(key, value); } writer.Close(); reader = new BloomMapFile.Reader(fs, qualifiedDirName.ToString(), conf); // check false positives rate int falsePos = 0; int falseNeg = 0; for (int i_1 = 0; i_1 < 2000; i_1++) { key.Set(i_1); bool exists = reader.ProbablyHasKey(key); if (i_1 % 2 == 0) { if (!exists) { falseNeg++; } } else { if (exists) { falsePos++; } } } reader.Close(); fs.Delete(qualifiedDirName, true); System.Console.Out.WriteLine("False negatives: " + falseNeg); Assert.Equal(0, falseNeg); System.Console.Out.WriteLine("False positives: " + falsePos); Assert.True(falsePos < 2); } finally { IOUtils.Cleanup(null, writer, reader); } }
public virtual void MergeSort(int[] src, int[] dest, int low, int high) { int length = high - low; // Insertion sort on smallest arrays if (length < 7) { for (int i = low; i < high; i++) { for (int j = i; j > low; j--) { I.Set(dest[j - 1]); J.Set(dest[j]); if (comparator.Compare(I, J) > 0) { Swap(dest, j, j - 1); } } } return; } // Recursively sort halves of dest into src int mid = (int)(((uint)(low + high)) >> 1); MergeSort(dest, src, low, mid); MergeSort(dest, src, mid, high); I.Set(src[mid - 1]); J.Set(src[mid]); // If list is already sorted, just copy from src to dest. This is an // optimization that results in faster sorts for nearly ordered lists. if (comparator.Compare(I, J) <= 0) { System.Array.Copy(src, low, dest, low, length); return; } // Merge sorted halves (now in src) into dest for (int i_1 = low; i_1 < high; i_1++) { if (q < high && p < mid) { I.Set(src[p]); J.Set(src[q]); } if (q >= high || p < mid && comparator.Compare(I, J) <= 0) { dest[i_1] = src[p++]; } else { dest[i_1] = src[q++]; } } }
/// <summary>write the int value</summary> /// <exception cref="System.IO.IOException"/> internal static void WriteInt(int value, DataOutputStream @out) { IntWritable uInt = TlData.Get().UInt; uInt.Set(value); uInt.Write(@out); }
// Extracts a block (data enclosed within delimeters) ignoring escape // sequences. Throws ParseException if an incomplete block is found else // returns null. /// <exception cref="Sharpen.ParseException"/> private static string GetBlock(string str, char open, char close, IntWritable index ) { StringBuilder split = new StringBuilder(); int next = StringUtils.FindNext(str, open, StringUtils.EscapeChar, index.Get(), split ); split.Length = 0; // clear the buffer if (next >= 0) { ++next; // move over '(' next = StringUtils.FindNext(str, close, StringUtils.EscapeChar, next, split); if (next >= 0) { ++next; // move over ')' index.Set(next); return(split.ToString()); } else { // found a block throw new ParseException("Unexpected end of block", next); } } return(null); }
public virtual void BinaryProtocolStub() { try { InitSoket(); // output code WritableUtils.WriteVInt(dataOut, 50); IntWritable wt = new IntWritable(); wt.Set(123); WriteObject(wt, dataOut); WriteObject(new Text("value"), dataOut); // PARTITIONED_OUTPUT WritableUtils.WriteVInt(dataOut, 51); WritableUtils.WriteVInt(dataOut, 0); WriteObject(wt, dataOut); WriteObject(new Text("value"), dataOut); // STATUS WritableUtils.WriteVInt(dataOut, 52); Text.WriteString(dataOut, "PROGRESS"); dataOut.Flush(); // progress WritableUtils.WriteVInt(dataOut, 53); dataOut.WriteFloat(0.55f); // register counter WritableUtils.WriteVInt(dataOut, 55); // id WritableUtils.WriteVInt(dataOut, 0); Text.WriteString(dataOut, "group"); Text.WriteString(dataOut, "name"); // increment counter WritableUtils.WriteVInt(dataOut, 56); WritableUtils.WriteVInt(dataOut, 0); WritableUtils.WriteVLong(dataOut, 2); // map item int intValue = WritableUtils.ReadVInt(dataInput); System.Console.Out.WriteLine("intValue:" + intValue); IntWritable iw = new IntWritable(); ReadObject(iw, dataInput); System.Console.Out.WriteLine("key:" + iw.Get()); Text txt = new Text(); ReadObject(txt, dataInput); System.Console.Out.WriteLine("value:" + txt.ToString()); // done // end of session WritableUtils.WriteVInt(dataOut, 54); System.Console.Out.WriteLine("finish"); dataOut.Flush(); dataOut.Close(); } catch (Exception x) { Sharpen.Runtime.PrintStackTrace(x); } finally { CloseSoket(); } }
public virtual void TestMerge() { string TestMethodKey = "testMerge.mapfile"; int Size = 10; int Iterations = 5; Path[] @in = new Path[5]; IList <int> expected = new AList <int>(); for (int j = 0; j < 5; j++) { using (MapFile.Writer writer = CreateWriter(TestMethodKey + "." + j, typeof(IntWritable ), typeof(Text))) { @in[j] = new Path(TestDir, TestMethodKey + "." + j); for (int i = 0; i < Size; i++) { expected.AddItem(i + j); writer.Append(new IntWritable(i + j), new Text("Value:" + (i + j))); } } } // Sort expected values expected.Sort(); // Merge all 5 files MapFile.Merger merger = new MapFile.Merger(conf); merger.Merge(@in, true, new Path(TestDir, TestMethodKey)); using (MapFile.Reader reader = CreateReader(TestMethodKey, typeof(IntWritable))) { int start = 0; // test iteration Text startValue = new Text("Value:" + start); int i = 0; while (i++ < Iterations) { IEnumerator <int> expectedIterator = expected.GetEnumerator(); IntWritable key = new IntWritable(start); Text value = startValue; IntWritable prev = new IntWritable(start); while (reader.Next(key, value)) { Assert.True("Next key should be always equal or more", prev.Get () <= key.Get()); Assert.Equal(expectedIterator.Next(), key.Get()); prev.Set(key.Get()); } reader.Reset(); } } // inputs should be deleted for (int j_1 = 0; j_1 < @in.Length; j_1++) { Path path = @in[j_1]; NUnit.Framework.Assert.IsFalse("inputs should be deleted", path.GetFileSystem(conf ).Exists(path)); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> protected internal override void Reduce(Key key, IEnumerable <IntWritable> values, Reducer.Context context) { int sum = 0; foreach (IntWritable val in values) { sum += val.Get(); } result.Set(sum); context.Write(key, result); }
/// <exception cref="System.IO.IOException"/> public static void WriteSequenceFile(SequenceFile.Writer writer, int numRecords) { IntWritable key = new IntWritable(); Text val = new Text(); for (int numWritten = 0; numWritten < numRecords; ++numWritten) { key.Set(numWritten); RandomText(val, numWritten, Recordsize); writer.Append(key, val); } writer.Close(); }
public virtual void CopyFailed(TaskAttemptID mapId, MapHost host, bool readError, bool connectExcpt) { lock (this) { host.Penalize(); int failures = 1; if (failureCounts.Contains(mapId)) { IntWritable x = failureCounts[mapId]; x.Set(x.Get() + 1); failures = x.Get(); } else { failureCounts[mapId] = new IntWritable(1); } string hostname = host.GetHostName(); IntWritable hostFailedNum = hostFailures[hostname]; // MAPREDUCE-6361: hostname could get cleanup from hostFailures in another // thread with copySucceeded. // In this case, add back hostname to hostFailures to get rid of NPE issue. if (hostFailedNum == null) { hostFailures[hostname] = new IntWritable(1); } //report failure if already retried maxHostFailures times bool hostFail = hostFailures[hostname].Get() > GetMaxHostFailures() ? true : false; if (failures >= abortFailureLimit) { try { throw new IOException(failures + " failures downloading " + mapId); } catch (IOException ie) { reporter.ReportException(ie); } } CheckAndInformMRAppMaster(failures, mapId, readError, connectExcpt, hostFail); CheckReducerHealth(); long delay = (long)(InitialPenalty * Math.Pow(PenaltyGrowthRate, failures)); if (delay > maxDelay) { delay = maxDelay; } penalties.AddItem(new ShuffleSchedulerImpl.Penalty(host, delay)); failedShuffleCounter.Increment(1); } }
public virtual void HostFailed(string hostname) { lock (this) { if (hostFailures.Contains(hostname)) { IntWritable x = hostFailures[hostname]; x.Set(x.Get() + 1); } else { hostFailures[hostname] = new IntWritable(1); } } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> protected override void Map(IntWritable key, TupleWritable val, Mapper.Context context ) { int k = key.Get(); string kvstr = "Unexpected tuple: " + Stringify(key, val); NUnit.Framework.Assert.IsTrue(kvstr, 0 == k % (srcs * srcs)); for (int i = 0; i < val.Size(); ++i) { int vali = ((IntWritable)val.Get(i)).Get(); NUnit.Framework.Assert.IsTrue(kvstr, (vali - i) * srcs == 10 * k); } context.Write(key, one); // If the user modifies the key or any of the values in the tuple, it // should not affect the rest of the join. key.Set(-1); if (val.Has(0)) { ((IntWritable)val.Get(0)).Set(0); } }
/// <exception cref="System.IO.IOException"/> private static Path[] WriteSimpleSrc(Path testdir, Configuration conf, int srcs) { SequenceFile.Writer[] @out = null; Path[] src = new Path[srcs]; try { @out = CreateWriters(testdir, conf, srcs, src); int capacity = srcs * 2 + 1; IntWritable key = new IntWritable(); IntWritable val = new IntWritable(); for (int k = 0; k < capacity; ++k) { for (int i = 0; i < srcs; ++i) { key.Set(k % srcs == 0 ? k * srcs : k * srcs + i); val.Set(10 * k + i); @out[i].Append(key, val); if (i == k) { // add duplicate key @out[i].Append(key, val); } } } } finally { if (@out != null) { for (int i = 0; i < srcs; ++i) { if (@out[i] != null) { @out[i].Close(); } } } } return(src); }
/// <exception cref="System.IO.IOException"/> private static Path[] GenerateSources(Configuration conf) { for (int i = 0; i < Sources; ++i) { source[i] = new int[Items]; for (int j = 0; j < Items; ++j) { source[i][j] = (i + 2) * (j + 1); } } Path[] src = new Path[Sources]; SequenceFile.Writer[] @out = CreateWriters(@base, conf, Sources, src); IntWritable k = new IntWritable(); for (int i_1 = 0; i_1 < Sources; ++i_1) { Writable v; if (i_1 != Sources - 1) { v = new IntWritable(); ((IntWritable)v).Set(i_1); } else { v = new LongWritable(); ((LongWritable)v).Set(i_1); } for (int j = 0; j < Items; ++j) { k.Set(source[i_1][j]); @out[i_1].Append(k, v); } @out[i_1].Close(); } return(src); }
/// <exception cref="System.Exception"/> public virtual void TestNestedJoin() { // outer(inner(S1,...,Sn),outer(S1,...Sn)) int Sources = 3; int Items = (Sources + 1) * (Sources + 1); JobConf job = new JobConf(); Path @base = cluster.GetFileSystem().MakeQualified(new Path("/nested")); int[][] source = new int[Sources][]; for (int i = 0; i < Sources; ++i) { source[i] = new int[Items]; for (int j = 0; j < Items; ++j) { source[i][j] = (i + 2) * (j + 1); } } Path[] src = new Path[Sources]; SequenceFile.Writer[] @out = CreateWriters(@base, job, Sources, src); IntWritable k = new IntWritable(); for (int i_1 = 0; i_1 < Sources; ++i_1) { IntWritable v = new IntWritable(); v.Set(i_1); for (int j = 0; j < Items; ++j) { k.Set(source[i_1][j]); @out[i_1].Append(k, v); } @out[i_1].Close(); } @out = null; StringBuilder sb = new StringBuilder(); sb.Append("outer(inner("); for (int i_2 = 0; i_2 < Sources; ++i_2) { sb.Append(CompositeInputFormat.Compose(typeof(SequenceFileInputFormat), src[i_2]. ToString())); if (i_2 + 1 != Sources) { sb.Append(","); } } sb.Append("),outer("); sb.Append(CompositeInputFormat.Compose(typeof(TestDatamerge.Fake_IF), "foobar")); sb.Append(","); for (int i_3 = 0; i_3 < Sources; ++i_3) { sb.Append(CompositeInputFormat.Compose(typeof(SequenceFileInputFormat), src[i_3]. ToString())); sb.Append(","); } sb.Append(CompositeInputFormat.Compose(typeof(TestDatamerge.Fake_IF), "raboof") + "))"); job.Set("mapreduce.join.expr", sb.ToString()); job.SetInputFormat(typeof(CompositeInputFormat)); Path outf = new Path(@base, "out"); FileOutputFormat.SetOutputPath(job, outf); TestDatamerge.Fake_IF.SetKeyClass(job, typeof(IntWritable)); TestDatamerge.Fake_IF.SetValClass(job, typeof(IntWritable)); job.SetMapperClass(typeof(IdentityMapper)); job.SetReducerClass(typeof(IdentityReducer)); job.SetNumReduceTasks(0); job.SetOutputKeyClass(typeof(IntWritable)); job.SetOutputValueClass(typeof(TupleWritable)); job.SetOutputFormat(typeof(SequenceFileOutputFormat)); JobClient.RunJob(job); FileStatus[] outlist = cluster.GetFileSystem().ListStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter ()); NUnit.Framework.Assert.AreEqual(1, outlist.Length); NUnit.Framework.Assert.IsTrue(0 < outlist[0].GetLen()); SequenceFile.Reader r = new SequenceFile.Reader(cluster.GetFileSystem(), outlist[ 0].GetPath(), job); TupleWritable v_1 = new TupleWritable(); while (r.Next(k, v_1)) { NUnit.Framework.Assert.IsFalse(((TupleWritable)v_1.Get(1)).Has(0)); NUnit.Framework.Assert.IsFalse(((TupleWritable)v_1.Get(1)).Has(Sources + 1)); bool chk = true; int ki = k.Get(); for (int i_4 = 2; i_4 < Sources + 2; ++i_4) { if ((ki % i_4) == 0 && ki <= i_4 * Items) { NUnit.Framework.Assert.AreEqual(i_4 - 2, ((IntWritable)((TupleWritable)v_1.Get(1) ).Get((i_4 - 1))).Get()); } else { chk = false; } } if (chk) { // present in all sources; chk inner NUnit.Framework.Assert.IsTrue(v_1.Has(0)); for (int i_5 = 0; i_5 < Sources; ++i_5) { NUnit.Framework.Assert.IsTrue(((TupleWritable)v_1.Get(0)).Has(i_5)); } } else { // should not be present in inner join NUnit.Framework.Assert.IsFalse(v_1.Has(0)); } } r.Close(); @base.GetFileSystem(job).Delete(@base, true); }