/// <exception cref="System.IO.IOException"/> internal WrappedRecordReader(int id, RecordReader <K, U> rr, Type cmpcl, Configuration conf) { // index at which values will be inserted in collector // key at the top of this RR // value assoc with khead this.id = id; this.rr = rr; this.conf = (conf == null) ? new Configuration() : conf; khead = rr.CreateKey(); vhead = rr.CreateValue(); try { cmp = (null == cmpcl) ? WritableComparator.Get(khead.GetType(), this.conf) : System.Activator.CreateInstance (cmpcl); } catch (InstantiationException e) { throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e); } catch (MemberAccessException e) { throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e); } vjoin = new StreamBackedIterator <U>(); Next(); }
private void FillKey(BytesWritable o) { int len = keyLenRNG.NextInt(); if (len < MinKeyLen) { len = MinKeyLen; } o.SetSize(len); int n = MinKeyLen; while (n < len) { byte[] word = dict[random.Next(dict.Length)]; int l = Math.Min(word.Length, len - n); System.Array.Copy(word, 0, o.Get(), n, l); n += l; } if (sorted && WritableComparator.CompareBytes(lastKey.Get(), MinKeyLen, lastKey.GetSize () - MinKeyLen, o.Get(), MinKeyLen, o.GetSize() - MinKeyLen) > 0) { IncrementPrefix(); } System.Array.Copy(prefix, 0, o.Get(), 0, MinKeyLen); lastKey.Set(o); }
/// <summary> /// Register an optimized comparator for a /// <see cref="Record"/> /// implementation. /// </summary> /// <param name="c">record classs for which a raw comparator is provided</param> /// <param name="comparator">Raw comparator instance for class c</param> public static void Define(Type c, Org.Apache.Hadoop.Record.RecordComparator comparator ) { lock (typeof(RecordComparator)) { WritableComparator.Define(c, comparator); } }
public virtual int Compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { int n1 = WritableUtils.DecodeVIntSize(b1[s1]); int n2 = WritableUtils.DecodeVIntSize(b2[s2]); return(-1 * WritableComparator.CompareBytes(b1, s1 + n1, l1 - n1, b2, s2 + n2, l2 - n2)); }
/// <summary>Compare bytes from {#getBytes()}.</summary> /// <seealso cref="WritableComparator.CompareBytes(byte[], int, int, byte[], int, int) /// "/> public virtual int CompareTo(BinaryComparable other) { if (this == other) { return(0); } return(WritableComparator.CompareBytes(Bytes, 0, Length, other.Bytes, 0, other.Length)); }
public virtual void TestBakedUserComparator() { TestComparators.MyWritable a = new TestComparators.MyWritable(8, 8); TestComparators.MyWritable b = new TestComparators.MyWritable(7, 9); NUnit.Framework.Assert.IsTrue(a.CompareTo(b) > 0); NUnit.Framework.Assert.IsTrue(WritableComparator.Get(typeof(TestComparators.MyWritable )).Compare(a, b) < 0); }
/// <summary>Returns true if the byte array begins with the specified prefix.</summary> public static bool PrefixMatches(byte[] prefix, int prefixlen, byte[] b) { if (b.Length < prefixlen) { return(false); } return(WritableComparator.CompareBytes(prefix, 0, prefixlen, b, 0, prefixlen) == 0); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(WritableComparable key, Writable value, OutputCollector <IntWritable , SortValidator.RecordStatsChecker.RecordStatsWritable> output, Reporter reporter ) { // Set up rawKey and rawValue on the first call to 'map' if (recordId == -1) { rawKey = CreateRaw(key.GetType()); rawValue = CreateRaw(value.GetType()); } ++recordId; if (this.key == sortOutput) { // Check if keys are 'sorted' if this // record is from sort's output if (prevKey == null) { prevKey = key; keyClass = prevKey.GetType(); } else { // Sanity check if (keyClass != key.GetType()) { throw new IOException("Type mismatch in key: expected " + keyClass.FullName + ", received " + key.GetType().FullName); } // Check if they were sorted correctly if (prevKey.CompareTo(key) > 0) { throw new IOException("The 'map-reduce' framework wrongly" + " classifed (" + prevKey + ") > (" + key + ") " + "for record# " + recordId); } prevKey = key; } // Check if the sorted output is 'partitioned' right int keyPartition = partitioner.GetPartition(key, value, noSortReducers); if (partition != keyPartition) { throw new IOException("Partitions do not match for record# " + recordId + " ! - '" + partition + "' v/s '" + keyPartition + "'"); } } // Construct the record-stats and output (this.key, record-stats) byte[] keyBytes = rawKey.GetRawBytes(key); int keyBytesLen = rawKey.GetRawBytesLength(key); byte[] valueBytes = rawValue.GetRawBytes(value); int valueBytesLen = rawValue.GetRawBytesLength(value); int keyValueChecksum = (WritableComparator.HashBytes(keyBytes, keyBytesLen) ^ WritableComparator .HashBytes(valueBytes, valueBytesLen)); output.Collect(this.key, new SortValidator.RecordStatsChecker.RecordStatsWritable ((keyBytesLen + valueBytesLen), 1, keyValueChecksum)); }
/// <summary> /// Use (the specified slice of the array returned by) /// <see cref="Org.Apache.Hadoop.IO.BinaryComparable.GetBytes()"/> /// to partition. /// </summary> public override int GetPartition(BinaryComparable key, V value, int numPartitions ) { int length = key.GetLength(); int leftIndex = (leftOffset + length) % length; int rightIndex = (rightOffset + length) % length; int hash = WritableComparator.HashBytes(key.GetBytes(), leftIndex, rightIndex - leftIndex + 1); return((hash & int.MaxValue) % numPartitions); }
public virtual void TestCompareUnequalWritables() { var bTrue = WriteWritable(new BooleanWritable(true)); var bFalse = WriteWritable(new BooleanWritable(false)); WritableComparator writableComparator = WritableComparator.Get(typeof(BooleanWritable)); 0.ShouldEqual(Compare(writableComparator, bTrue, bTrue)); 0.ShouldEqual(Compare(writableComparator, bTrue, bTrue)); 0.ShouldEqual(Compare(writableComparator, bFalse, bFalse)); 1.ShouldEqual(Compare(writableComparator, bTrue, bFalse)); (-1).ShouldEqual(Compare(writableComparator, bFalse, bTrue)); }
/// <summary>Add a RecordReader to this collection.</summary> /// <remarks> /// Add a RecordReader to this collection. /// The id() of a RecordReader determines where in the Tuple its /// entry will appear. Adding RecordReaders with the same id has /// undefined behavior. /// </remarks> /// <exception cref="System.IO.IOException"/> public virtual void Add(ComposableRecordReader <K, V> rr) { kids[rr.Id()] = rr; if (null == q) { cmp = WritableComparator.Get(rr.CreateKey().GetType(), conf); q = new PriorityQueue <ComposableRecordReader <K, object> >(3, new _IComparer_136(this )); } if (rr.HasNext()) { q.AddItem(rr); } }
/// <summary> /// Create a RecordReader with <tt>capacity</tt> children to position /// <tt>id</tt> in the parent reader. /// </summary> /// <remarks> /// Create a RecordReader with <tt>capacity</tt> children to position /// <tt>id</tt> in the parent reader. /// The id of a root CompositeRecordReader is -1 by convention, but relying /// on this is not recommended. /// </remarks> /// <exception cref="System.IO.IOException"/> public CompositeRecordReader(int id, int capacity, Type cmpcl) { // Generic array assignment System.Diagnostics.Debug.Assert(capacity > 0, "Invalid capacity"); this.id = id; if (null != cmpcl) { cmp = ReflectionUtils.NewInstance(cmpcl, null); q = new PriorityQueue <ComposableRecordReader <K, object> >(3, new _IComparer_78(this )); } jc = new CompositeRecordReader.JoinCollector(this, capacity); kids = new ComposableRecordReader[capacity]; }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public override void Initialize(InputSplit split, TaskAttemptContext context) { rr.Initialize(split, context); conf = context.GetConfiguration(); NextKeyValue(); if (!empty) { keyclass = key.GetType().AsSubclass <WritableComparable>(); valueclass = value.GetType(); if (cmp == null) { cmp = WritableComparator.Get(keyclass, conf); } } }
/// <summary>Returns true iff <code>o</code> is a UTF8 with the same contents.</summary> public override bool Equals(object o) { if (!(o is UTF8)) { return(false); } UTF8 that = (UTF8)o; if (this.length != that.length) { return(false); } else { return(WritableComparator.CompareBytes(bytes, 0, length, that.bytes, 0, that.length ) == 0); } }
/// <summary>For a given RecordReader rr, occupy position id in collector.</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal WrappedRecordReader(int id, RecordReader <K, U> rr, Type cmpcl) { this.id = id; this.rr = rr; if (cmpcl != null) { try { this.cmp = System.Activator.CreateInstance(cmpcl); } catch (InstantiationException e) { throw new IOException(e); } catch (MemberAccessException e) { throw new IOException(e); } } vjoin = new StreamBackedIterator <U>(); }
/// <summary> /// Instead of filling the JoinCollector with iterators from all /// data sources, fill only the rightmost for this key. /// </summary> /// <remarks> /// Instead of filling the JoinCollector with iterators from all /// data sources, fill only the rightmost for this key. /// This not only saves space by discarding the other sources, but /// it also emits the number of key-value pairs in the preferred /// RecordReader instead of repeating that stream n times, where /// n is the cardinality of the cross product of the discarded /// streams for the given key. /// </remarks> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> protected internal override void FillJoinCollector(K iterkey) { PriorityQueue <ComposableRecordReader <K, object> > q = GetRecordReaderQueue(); if (q != null && !q.IsEmpty()) { int highpos = -1; AList <ComposableRecordReader <K, object> > list = new AList <ComposableRecordReader < K, object> >(kids.Length); q.Peek().Key(iterkey); WritableComparator cmp = GetComparator(); while (0 == cmp.Compare(q.Peek().Key(), iterkey)) { ComposableRecordReader <K, object> t = q.Poll(); if (-1 == highpos || list[highpos].Id() < t.Id()) { highpos = list.Count; } list.AddItem(t); if (q.IsEmpty()) { break; } } ComposableRecordReader <K, object> t_1 = list.Remove(highpos); t_1.Accept(jc, iterkey); foreach (ComposableRecordReader <K, object> rr in list) { rr.Skip(iterkey); } list.AddItem(t_1); foreach (ComposableRecordReader <K, object> rr_1 in list) { if (rr_1.HasNext()) { q.AddItem(rr_1); } } } }
private static void TestEncoding(long l) { byte[] b = GenericObjectMapper.WriteReverseOrderedLong(l); NUnit.Framework.Assert.AreEqual("error decoding", l, GenericObjectMapper.ReadReverseOrderedLong (b, 0)); byte[] buf = new byte[16]; System.Array.Copy(b, 0, buf, 5, 8); NUnit.Framework.Assert.AreEqual("error decoding at offset", l, GenericObjectMapper .ReadReverseOrderedLong(buf, 5)); if (l > long.MinValue) { byte[] a = GenericObjectMapper.WriteReverseOrderedLong(l - 1); NUnit.Framework.Assert.AreEqual("error preserving ordering", 1, WritableComparator .CompareBytes(a, 0, a.Length, b, 0, b.Length)); } if (l < long.MaxValue) { byte[] c = GenericObjectMapper.WriteReverseOrderedLong(l + 1); NUnit.Framework.Assert.AreEqual("error preserving ordering", 1, WritableComparator .CompareBytes(b, 0, b.Length, c, 0, c.Length)); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public override void Initialize(InputSplit split, TaskAttemptContext context) { if (kids != null) { for (int i = 0; i < kids.Length; ++i) { kids[i].Initialize(((CompositeInputSplit)split).Get(i), context); if (kids[i].Key() == null) { continue; } // get keyclass if (keyclass == null) { keyclass = kids[i].CreateKey().GetType().AsSubclass <WritableComparable>(); } // create priority queue if (null == q) { cmp = WritableComparator.Get(keyclass, conf); q = new PriorityQueue <ComposableRecordReader <K, object> >(3, new _IComparer_114(this )); } // Explicit check for key class agreement if (!keyclass.Equals(kids[i].Key().GetType())) { throw new InvalidCastException("Child key classes fail to agree"); } // add the kid to priority queue if it has any elements if (kids[i].HasNext()) { q.AddItem(kids[i]); } } } }
/// <exception cref="System.IO.IOException"/> public WritableSortable(int j) { seed = r.NextLong(); r.SetSeed(seed); Text t = new Text(); StringBuilder sb = new StringBuilder(); indices = new int[j]; offsets = new int[j]; check = new string[j]; DataOutputBuffer dob = new DataOutputBuffer(); for (int i = 0; i < j; ++i) { indices[i] = i; offsets[i] = dob.GetLength(); GenRandom(t, r.Next(15) + 1, sb); t.Write(dob); check[i] = t.ToString(); } eob = dob.GetLength(); bytes = dob.GetData(); comparator = WritableComparator.Get(typeof(Org.Apache.Hadoop.IO.Text)); }
/// <summary>Compare two UTF8s.</summary> public virtual int CompareTo(UTF8 o) { return(WritableComparator.CompareBytes(bytes, 0, length, o.bytes, 0, o.length)); }
public virtual int Compare(byte[] o1, byte[] o2) { return(WritableComparator.CompareBytes(o1, 0, o1.Length, o2, 0, o2.Length)); }
/// <exception cref="System.IO.IOException"/> public virtual void Reduce(Org.Apache.Hadoop.IO.Text key, IEnumerator <Org.Apache.Hadoop.IO.Text > values, OutputCollector <Org.Apache.Hadoop.IO.Text, Org.Apache.Hadoop.IO.Text> @out, Reporter reporter) { int vc = 0; int vlen; int preRec = nRec; int vcCheck; int recCheck; ((StringBuilder)fmt.Out()).Length = keylen; if (25 == key.GetLength()) { // tagged record recCheck = 1; switch ((char)key.GetBytes()[0]) { case 'A': { // expect only 1 record vlen = GetValLen(++aKey, nMaps) - 128; vcCheck = aKey; // expect eq id break; } case 'B': { vlen = GetValLen(++bKey, nMaps); vcCheck = bKey; // expect eq id break; } default: { vlen = vcCheck = -1; Fail("Unexpected tag on record: " + ((char)key.GetBytes()[24])); break; } } kb.Set((char)key.GetBytes()[0] + fmt.Format(tagfmt, vcCheck).ToString()); } else { kb.Set(fmt.Format(tagfmt, ++nKey).ToString()); vlen = 1000; recCheck = nMaps; // expect 1 rec per map vcCheck = (int)(((uint)(nMaps * (nMaps - 1))) >> 1); } // expect eq sum(id) NUnit.Framework.Assert.AreEqual(kb, key); while (values.HasNext()) { Org.Apache.Hadoop.IO.Text val = values.Next(); // increment vc by map ID assoc w/ val vc += val.GetBytes()[0]; // verify that all the fixed characters 'V' match NUnit.Framework.Assert.AreEqual(0, WritableComparator.CompareBytes(vb.GetBytes(), 1, vlen - 1, val.GetBytes(), 1, val.GetLength() - 1)); @out.Collect(key, val); ++nRec; } NUnit.Framework.Assert.AreEqual("Bad rec count for " + key, recCheck, nRec - preRec ); NUnit.Framework.Assert.AreEqual("Bad rec group for " + key, vcCheck, vc); }
public virtual int Compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { return(WritableComparator.CompareBytes(b1, s1, l1, b2, s2, l2)); }
static IntWritable() { // register this comparator WritableComparator.Define(typeof(IntWritable), new IntWritable.Comparator()); }
static UTF8() { // register this comparator WritableComparator.Define(typeof(UTF8), new UTF8.Comparator()); }
public override int GetHashCode() { return(WritableComparator.HashBytes(bytes, length)); }
/// <summary>Create a set naming the element comparator and compression type.</summary> /// <exception cref="System.IO.IOException"/> public Writer(Configuration conf, FileSystem fs, string dirName, WritableComparator comparator, SequenceFile.CompressionType compress) : base(conf, new Path(dirName), Comparator(comparator), ValueClass(typeof(NullWritable )), Compression(compress)) { }
/// <summary>Construct a set reader for the named set using the named comparator.</summary> /// <exception cref="System.IO.IOException"/> public Reader(FileSystem fs, string dirName, WritableComparator comparator, Configuration conf) : base(new Path(dirName), conf, Comparator(comparator)) { }
static BytesWritable() { // register this comparator WritableComparator.Define(typeof(BytesWritable), new Comparator()); }
/// <summary>Create a set naming the element class and compression type.</summary> /// <exception cref="System.IO.IOException"/> public Writer(Configuration conf, FileSystem fs, string dirName, Type keyClass, SequenceFile.CompressionType compress) : this(conf, fs, dirName, WritableComparator.Get(keyClass, conf), compress) { }