/// <summary>Populate Tuple from iterators.</summary> /// <remarks> /// Populate Tuple from iterators. /// It should be the case that, given iterators i_1...i_n over values from /// sources s_1...s_n sharing key k, repeated calls to next should yield /// I x I. /// </remarks> /// <exception cref="System.IO.IOException"/> protected internal virtual bool Next(TupleWritable val) { // No static typeinfo on Tuples if (this.first) { int i = -1; for (this.pos = 0; this.pos < this.iters.Length; ++this.pos) { if (this.iters[this.pos].HasNext() && this.iters[this.pos].Next((X)val.Get(this.pos ))) { i = this.pos; val.SetWritten(i); } } this.pos = i; this.first = false; if (this.pos < 0) { this.Clear(); return(false); } return(true); } while (0 <= this.pos && !(this.iters[this.pos].HasNext() && this.iters[this.pos]. Next((X)val.Get(this.pos)))) { --this.pos; } if (this.pos < 0) { this.Clear(); return(false); } val.SetWritten(this.pos); for (int i_1 = 0; i_1 < this.pos; ++i_1) { if (this.iters[i_1].Replay((X)val.Get(i_1))) { val.SetWritten(i_1); } } while (this.pos + 1 < this.iters.Length) { ++this.pos; this.iters[this.pos].Reset(); if (this.iters[this.pos].HasNext() && this.iters[this.pos].Next((X)val.Get(this.pos ))) { val.SetWritten(this.pos); } } return(true); }
public virtual bool IsCompatible(TupleWritable that) { if (this.Size() != that.Size()) { return(false); } for (int i = 0; i < values.Length; ++i) { if (Has(i) != that.Has(i)) { return(false); } if (Has(i) && !values[i].Equals(that.Get(i))) { return(false); } } return(true); }
/// <summary>Replay the last Tuple emitted.</summary> /// <exception cref="System.IO.IOException"/> public virtual bool Replay(TupleWritable val) { // No static typeinfo on Tuples // The last emitted tuple might have drawn on an empty source; // it can't be cleared prematurely, b/c there may be more duplicate // keys in iterator positions < pos System.Diagnostics.Debug.Assert(!this.first); bool ret = false; for (int i = 0; i < this.iters.Length; ++i) { if (this.iters[i].Replay((X)val.Get(i))) { val.SetWritten(i); ret = true; } } return(ret); }
/// <exception cref="System.Exception"/> public virtual void TestNestedJoin() { // outer(inner(S1,...,Sn),outer(S1,...Sn)) int Sources = 3; int Items = (Sources + 1) * (Sources + 1); JobConf job = new JobConf(); Path @base = cluster.GetFileSystem().MakeQualified(new Path("/nested")); int[][] source = new int[Sources][]; for (int i = 0; i < Sources; ++i) { source[i] = new int[Items]; for (int j = 0; j < Items; ++j) { source[i][j] = (i + 2) * (j + 1); } } Path[] src = new Path[Sources]; SequenceFile.Writer[] @out = CreateWriters(@base, job, Sources, src); IntWritable k = new IntWritable(); for (int i_1 = 0; i_1 < Sources; ++i_1) { IntWritable v = new IntWritable(); v.Set(i_1); for (int j = 0; j < Items; ++j) { k.Set(source[i_1][j]); @out[i_1].Append(k, v); } @out[i_1].Close(); } @out = null; StringBuilder sb = new StringBuilder(); sb.Append("outer(inner("); for (int i_2 = 0; i_2 < Sources; ++i_2) { sb.Append(CompositeInputFormat.Compose(typeof(SequenceFileInputFormat), src[i_2]. ToString())); if (i_2 + 1 != Sources) { sb.Append(","); } } sb.Append("),outer("); sb.Append(CompositeInputFormat.Compose(typeof(TestDatamerge.Fake_IF), "foobar")); sb.Append(","); for (int i_3 = 0; i_3 < Sources; ++i_3) { sb.Append(CompositeInputFormat.Compose(typeof(SequenceFileInputFormat), src[i_3]. ToString())); sb.Append(","); } sb.Append(CompositeInputFormat.Compose(typeof(TestDatamerge.Fake_IF), "raboof") + "))"); job.Set("mapreduce.join.expr", sb.ToString()); job.SetInputFormat(typeof(CompositeInputFormat)); Path outf = new Path(@base, "out"); FileOutputFormat.SetOutputPath(job, outf); TestDatamerge.Fake_IF.SetKeyClass(job, typeof(IntWritable)); TestDatamerge.Fake_IF.SetValClass(job, typeof(IntWritable)); job.SetMapperClass(typeof(IdentityMapper)); job.SetReducerClass(typeof(IdentityReducer)); job.SetNumReduceTasks(0); job.SetOutputKeyClass(typeof(IntWritable)); job.SetOutputValueClass(typeof(TupleWritable)); job.SetOutputFormat(typeof(SequenceFileOutputFormat)); JobClient.RunJob(job); FileStatus[] outlist = cluster.GetFileSystem().ListStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter ()); NUnit.Framework.Assert.AreEqual(1, outlist.Length); NUnit.Framework.Assert.IsTrue(0 < outlist[0].GetLen()); SequenceFile.Reader r = new SequenceFile.Reader(cluster.GetFileSystem(), outlist[ 0].GetPath(), job); TupleWritable v_1 = new TupleWritable(); while (r.Next(k, v_1)) { NUnit.Framework.Assert.IsFalse(((TupleWritable)v_1.Get(1)).Has(0)); NUnit.Framework.Assert.IsFalse(((TupleWritable)v_1.Get(1)).Has(Sources + 1)); bool chk = true; int ki = k.Get(); for (int i_4 = 2; i_4 < Sources + 2; ++i_4) { if ((ki % i_4) == 0 && ki <= i_4 * Items) { NUnit.Framework.Assert.AreEqual(i_4 - 2, ((IntWritable)((TupleWritable)v_1.Get(1) ).Get((i_4 - 1))).Get()); } else { chk = false; } } if (chk) { // present in all sources; chk inner NUnit.Framework.Assert.IsTrue(v_1.Has(0)); for (int i_5 = 0; i_5 < Sources; ++i_5) { NUnit.Framework.Assert.IsTrue(((TupleWritable)v_1.Get(0)).Has(i_5)); } } else { // should not be present in inner join NUnit.Framework.Assert.IsFalse(v_1.Has(0)); } } r.Close(); @base.GetFileSystem(job).Delete(@base, true); }