/// <exception cref="System.IO.IOException"/> public Segment(Configuration conf, FileSystem fs, Path file, CompressionCodec codec , bool preserve, Counters.Counter mergedMapOutputsCounter, long rawDataLength) : this(conf, fs, file, 0, fs.GetFileStatus(file).GetLen(), codec, preserve, mergedMapOutputsCounter ) { this.rawDataLength = rawDataLength; }
/// <exception cref="System.IO.IOException"/> public virtual RawKeyValueIterator Merge(Type keyClass, Type valueClass, int factor , Path tmpDir, Counters.Counter readsCounter, Counters.Counter writesCounter, Progress mergePhase) { return(Merge(keyClass, valueClass, factor, 0, tmpDir, readsCounter, writesCounter , mergePhase)); }
/// <summary>Construct an IFile Reader.</summary> /// <param name="conf">Configuration File</param> /// <param name="in">The input stream</param> /// <param name="length"> /// Length of the data in the stream, including the checksum /// bytes. /// </param> /// <param name="codec">codec</param> /// <param name="readsCounter">Counter for records read from disk</param> /// <exception cref="System.IO.IOException"/> public Reader(Configuration conf, FSDataInputStream @in, long length, CompressionCodec codec, Counters.Counter readsCounter) { // Count records read from disk // Possibly decompressed stream that we read readRecordsCounter = readsCounter; checksumIn = new IFileInputStream(@in, length, conf); if (codec != null) { decompressor = CodecPool.GetDecompressor(codec); if (decompressor != null) { this.@in = codec.CreateInputStream(checksumIn, decompressor); } else { Log.Warn("Could not obtain decompressor from CodecPool"); this.@in = checksumIn; } } else { this.@in = checksumIn; } this.dataIn = new DataInputStream(this.@in); this.fileLength = length; if (conf != null) { bufferSize = conf.GetInt("io.file.buffer.size", DefaultBufferSize); } }
public virtual void TestCounterValue() { Counters counters = new Counters(); int NumberTests = 100; int NumberInc = 10; Random rand = new Random(); for (int i = 0; i < NumberTests; i++) { long initValue = rand.Next(); long expectedValue = initValue; Counters.Counter counter = counters.FindCounter("foo", "bar"); counter.SetValue(initValue); NUnit.Framework.Assert.AreEqual("Counter value is not initialized correctly", expectedValue , counter.GetValue()); for (int j = 0; j < NumberInc; j++) { int incValue = rand.Next(); counter.Increment(incValue); expectedValue += incValue; NUnit.Framework.Assert.AreEqual("Counter value is not incremented correctly", expectedValue , counter.GetValue()); } expectedValue = rand.Next(); counter.SetValue(expectedValue); NUnit.Framework.Assert.AreEqual("Counter value is not set correctly", expectedValue , counter.GetValue()); } }
public Context(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS, TaskUmbilicalProtocol umbilical, LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec, Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter , Counters.Counter shuffledMapsCounter, Counters.Counter reduceShuffleBytes, Counters.Counter failedShuffleCounter, Counters.Counter mergedMapOutputsCounter, TaskStatus status , Progress copyPhase, Progress mergePhase, Task reduceTask, MapOutputFile mapOutputFile , IDictionary <TaskAttemptID, MapOutputFile> localMapFiles) { this.reduceId = reduceId; this.jobConf = jobConf; this.localFS = localFS; this.umbilical = umbilical; this.localDirAllocator = localDirAllocator; this.reporter = reporter; this.codec = codec; this.combinerClass = combinerClass; this.combineCollector = combineCollector; this.spilledRecordsCounter = spilledRecordsCounter; this.reduceCombineInputCounter = reduceCombineInputCounter; this.shuffledMapsCounter = shuffledMapsCounter; this.reduceShuffleBytes = reduceShuffleBytes; this.failedShuffleCounter = failedShuffleCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.status = status; this.copyPhase = copyPhase; this.mergePhase = mergePhase; this.reduceTask = reduceTask; this.mapOutputFile = mapOutputFile; this.localMapFiles = localMapFiles; }
public Segment(IFile.Reader <K, V> reader, bool preserve, Counters.Counter mapOutputsCounter ) { this.reader = reader; this.preserve = preserve; this.segmentLength = reader.GetLength(); this.mapOutputsCounter = mapOutputsCounter; }
/// <exception cref="System.IO.IOException"/> public static RawKeyValueIterator Merge <K, V>(Configuration conf, FileSystem fs, IList <Merger.Segment <K, V> > segments, int mergeFactor, int inMemSegments, Path tmpDir , RawComparator <K> comparator, Progressable reporter, bool sortSegments, Counters.Counter readsCounter, Counters.Counter writesCounter, Progress mergePhase) { System.Type keyClass = typeof(K); System.Type valueClass = typeof(V); return(new Merger.MergeQueue <K, V>(conf, fs, segments, comparator, reporter, sortSegments , TaskType.Reduce).Merge(keyClass, valueClass, mergeFactor, inMemSegments, tmpDir , readsCounter, writesCounter, mergePhase)); }
// Local directories /// <exception cref="System.IO.IOException"/> public static RawKeyValueIterator Merge <K, V>(Configuration conf, FileSystem fs, CompressionCodec codec, Path[] inputs, bool deleteInputs, int mergeFactor, Path tmpDir, RawComparator <K> comparator, Progressable reporter, Counters.Counter readsCounter , Counters.Counter writesCounter, Progress mergePhase) { System.Type keyClass = typeof(K); System.Type valueClass = typeof(V); return(new Merger.MergeQueue <K, V>(conf, fs, inputs, deleteInputs, codec, comparator , reporter, null, TaskType.Reduce).Merge(keyClass, valueClass, mergeFactor, tmpDir , readsCounter, writesCounter, mergePhase)); }
/// <exception cref="System.IO.IOException"/> public Segment(Configuration conf, FileSystem fs, Path file, long segmentOffset, long segmentLength, CompressionCodec codec, bool preserve, Counters.Counter mergedMapOutputsCounter ) { this.conf = conf; this.fs = fs; this.file = file; this.codec = codec; this.preserve = preserve; this.segmentOffset = segmentOffset; this.segmentLength = segmentLength; this.mapOutputsCounter = mergedMapOutputsCounter; }
/// <exception cref="System.IO.IOException"/> internal virtual void Init(Counters.Counter readsCounter) { if (reader == null) { FSDataInputStream @in = fs.Open(file); @in.Seek(segmentOffset); @in = CryptoUtils.WrapIfNecessary(conf, @in); reader = new IFile.Reader <K, V>(conf, @in, segmentLength - CryptoUtils.CryptoPadding (conf), codec, readsCounter); } if (mapOutputsCounter != null) { mapOutputsCounter.Increment(1); } }
public override bool Equals(object genericRight) { lock (this) { if (genericRight is Counters.Counter) { lock (genericRight) { Counters.Counter right = (Counters.Counter)genericRight; return(GetName().Equals(right.GetName()) && GetDisplayName().Equals(right.GetDisplayName ()) && GetValue() == right.GetValue()); } } return(false); } }
/// <exception cref="System.IO.IOException"/> public SkippingReduceValuesIterator(ReduceTask _enclosing, RawKeyValueIterator @in , RawComparator <KEY> comparator, Type keyClass, Type valClass, Configuration conf , Task.TaskReporter reporter, TaskUmbilicalProtocol umbilical) : base(_enclosing) { this._enclosing = _enclosing; this.umbilical = umbilical; this.skipGroupCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedGroups )); this.skipRecCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedRecords )); this.toWriteSkipRecs = this._enclosing.ToWriteSkipRecs() && SkipBadRecords.GetSkipOutputPath (conf) != null; this.keyClass = keyClass; this.valClass = valClass; this.reporter = reporter; this.skipIt = this._enclosing.GetSkipRanges().SkipRangeIterator(); this.MayBeSkip(); }
/// <exception cref="System.IO.IOException"/> public OldTrackingRecordWriter(ReduceTask reduce, JobConf job, Task.TaskReporter reporter, string finalName) { this.reduceOutputCounter = reduce.reduceOutputCounter; this.fileOutputByteCounter = reduce.fileOutputByteCounter; IList <FileSystem.Statistics> matchedStats = null; if (job.GetOutputFormat() is FileOutputFormat) { matchedStats = GetFsStatistics(FileOutputFormat.GetOutputPath(job), job); } fsStats = matchedStats; FileSystem fs = FileSystem.Get(job); long bytesOutPrev = GetOutputBytes(fsStats); this.real = job.GetOutputFormat().GetRecordWriter(fs, job, finalName, reporter); long bytesOutCurr = GetOutputBytes(fsStats); fileOutputByteCounter.Increment(bytesOutCurr - bytesOutPrev); }
/// <exception cref="System.IO.IOException"/> public Writer(Configuration conf, FSDataOutputStream @out, Type keyClass, Type valueClass , CompressionCodec codec, Counters.Counter writesCounter, bool ownOutputStream) { this.writtenRecordsCounter = writesCounter; this.checksumOut = new IFileOutputStream(@out); this.rawOut = @out; this.start = this.rawOut.GetPos(); if (codec != null) { this.compressor = CodecPool.GetCompressor(codec); if (this.compressor != null) { this.compressor.Reset(); this.compressedOut = codec.CreateOutputStream(checksumOut, compressor); this.@out = new FSDataOutputStream(this.compressedOut, null); this.compressOutput = true; } else { Log.Warn("Could not obtain compressor from CodecPool"); this.@out = new FSDataOutputStream(checksumOut, null); } } else { this.@out = new FSDataOutputStream(checksumOut, null); } this.keyClass = keyClass; this.valueClass = valueClass; if (keyClass != null) { SerializationFactory serializationFactory = new SerializationFactory(conf); this.keySerializer = serializationFactory.GetSerializer(keyClass); this.keySerializer.Open(buffer); this.valueSerializer = serializationFactory.GetSerializer(valueClass); this.valueSerializer.Open(buffer); } this.ownOutputStream = ownOutputStream; }
/// <exception cref="System.IO.IOException"/> internal virtual RawKeyValueIterator Merge(Type keyClass, Type valueClass, int factor , int inMem, Path tmpDir, Counters.Counter readsCounter, Counters.Counter writesCounter , Progress mergePhase) { Log.Info("Merging " + segments.Count + " sorted segments"); /* * If there are inMemory segments, then they come first in the segments * list and then the sorted disk segments. Otherwise(if there are only * disk segments), then they are sorted segments if there are more than * factor segments in the segments list. */ int numSegments = segments.Count; int origFactor = factor; int passNo = 1; if (mergePhase != null) { mergeProgress = mergePhase; } long totalBytes = ComputeBytesInMerges(factor, inMem); if (totalBytes != 0) { progPerByte = 1.0f / (float)totalBytes; } do { //create the MergeStreams from the sorted map created in the constructor //and dump the final output to a file //get the factor for this pass of merge. We assume in-memory segments //are the first entries in the segment list and that the pass factor //doesn't apply to them factor = GetPassFactor(factor, passNo, numSegments - inMem); if (1 == passNo) { factor += inMem; } IList <Merger.Segment <K, V> > segmentsToMerge = new AList <Merger.Segment <K, V> >(); int segmentsConsidered = 0; int numSegmentsToConsider = factor; long startBytes = 0; // starting bytes of segments of this merge while (true) { //extract the smallest 'factor' number of segments //Call cleanup on the empty segments (no key/value data) IList <Merger.Segment <K, V> > mStream = GetSegmentDescriptors(numSegmentsToConsider ); foreach (Merger.Segment <K, V> segment in mStream) { // Initialize the segment at the last possible moment; // this helps in ensuring we don't use buffers until we need them segment.Init(readsCounter); long startPos = segment.GetReader().bytesRead; bool hasNext = segment.NextRawKey(); long endPos = segment.GetReader().bytesRead; if (hasNext) { startBytes += endPos - startPos; segmentsToMerge.AddItem(segment); segmentsConsidered++; } else { segment.Close(); numSegments--; } } //we ignore this segment for the merge //if we have the desired number of segments //or looked at all available segments, we break if (segmentsConsidered == factor || segments.Count == 0) { break; } numSegmentsToConsider = factor - segmentsConsidered; } //feed the streams to the priority queue Initialize(segmentsToMerge.Count); Clear(); foreach (Merger.Segment <K, V> segment_1 in segmentsToMerge) { Put(segment_1); } //if we have lesser number of segments remaining, then just return the //iterator, else do another single level merge if (numSegments <= factor) { if (!includeFinalMerge) { // for reduce task // Reset totalBytesProcessed and recalculate totalBytes from the // remaining segments to track the progress of the final merge. // Final merge is considered as the progress of the reducePhase, // the 3rd phase of reduce task. totalBytesProcessed = 0; totalBytes = 0; for (int i = 0; i < segmentsToMerge.Count; i++) { totalBytes += segmentsToMerge[i].GetRawDataLength(); } } if (totalBytes != 0) { //being paranoid progPerByte = 1.0f / (float)totalBytes; } totalBytesProcessed += startBytes; if (totalBytes != 0) { mergeProgress.Set(totalBytesProcessed * progPerByte); } else { mergeProgress.Set(1.0f); } // Last pass and no segments left - we're done Log.Info("Down to the last merge-pass, with " + numSegments + " segments left of total size: " + (totalBytes - totalBytesProcessed) + " bytes"); return(this); } else { Log.Info("Merging " + segmentsToMerge.Count + " intermediate segments out of a total of " + (segments.Count + segmentsToMerge.Count)); long bytesProcessedInPrevMerges = totalBytesProcessed; totalBytesProcessed += startBytes; //we want to spread the creation of temp files on multiple disks if //available under the space constraints long approxOutputSize = 0; foreach (Merger.Segment <K, V> s in segmentsToMerge) { approxOutputSize += s.GetLength() + ChecksumFileSystem.GetApproxChkSumLength(s.GetLength ()); } Path tmpFilename = new Path(tmpDir, "intermediate").Suffix("." + passNo); Path outputFile = lDirAlloc.GetLocalPathForWrite(tmpFilename.ToString(), approxOutputSize , conf); FSDataOutputStream @out = fs.Create(outputFile); @out = CryptoUtils.WrapIfNecessary(conf, @out); IFile.Writer <K, V> writer = new IFile.Writer <K, V>(conf, @out, keyClass, valueClass , codec, writesCounter, true); WriteFile(this, writer, reporter, conf); writer.Close(); //we finished one single level merge; now clean up the priority //queue this.Close(); // Add the newly create segment to the list of segments to be merged Merger.Segment <K, V> tempSegment = new Merger.Segment <K, V>(conf, fs, outputFile, codec, false); // Insert new merged segment into the sorted list int pos = Sharpen.Collections.BinarySearch(segments, tempSegment, segmentComparator ); if (pos < 0) { // binary search failed. So position to be inserted at is -pos-1 pos = -pos - 1; } segments.Add(pos, tempSegment); numSegments = segments.Count; // Subtract the difference between expected size of new segment and // actual size of new segment(Expected size of new segment is // inputBytesOfThisMerge) from totalBytes. Expected size and actual // size will match(almost) if combiner is not called in merge. long inputBytesOfThisMerge = totalBytesProcessed - bytesProcessedInPrevMerges; totalBytes -= inputBytesOfThisMerge - tempSegment.GetRawDataLength(); if (totalBytes != 0) { progPerByte = 1.0f / (float)totalBytes; } passNo++; } //we are worried about only the first pass merge factor. So reset the //factor to what it originally was factor = origFactor; }while (true); }
/// <exception cref="System.IO.IOException"/> public Writer(Configuration conf, FSDataOutputStream @out, Type keyClass, Type valueClass , CompressionCodec codec, Counters.Counter writesCounter) : this(conf, @out, keyClass, valueClass, codec, writesCounter, false) { }
/// <exception cref="System.IO.IOException"/> public static RawKeyValueIterator Merge <K, V>(Configuration conf, FileSystem fs, IList <Merger.Segment <K, V> > segments, int mergeFactor, Path tmpDir, RawComparator <K> comparator, Progressable reporter, Counters.Counter readsCounter, Counters.Counter writesCounter, Progress mergePhase) { System.Type keyClass = typeof(K); System.Type valueClass = typeof(V); return(Merge(conf, fs, keyClass, valueClass, segments, mergeFactor, tmpDir, comparator , reporter, false, readsCounter, writesCounter, mergePhase)); }
/// <exception cref="System.IO.IOException"/> public MergeQueue(Configuration conf, FileSystem fs, Path[] inputs, bool deleteInputs , CompressionCodec codec, RawComparator <K> comparator, Progressable reporter, Counters.Counter mergedMapOutputsCounter, TaskType taskType) { this.conf = conf; this.fs = fs; this.codec = codec; this.comparator = comparator; this.reporter = reporter; if (taskType == TaskType.Map) { ConsiderFinalMergeForProgress(); } foreach (Path file in inputs) { Log.Debug("MergeQ: adding: " + file); segments.AddItem(new Merger.Segment <K, V>(conf, fs, file, codec, !deleteInputs, ( file.ToString().EndsWith(Task.MergedOutputPrefix) ? null : mergedMapOutputsCounter ))); } // Sort segments on file-lengths segments.Sort(segmentComparator); }
protected internal Writer(Counters.Counter writesCounter) { // End of File Marker // Count records written to disk writtenRecordsCounter = writesCounter; }
/// <summary>Construct an IFile Reader.</summary> /// <param name="conf">Configuration File</param> /// <param name="fs">FileSystem</param> /// <param name="file"> /// Path of the file to be opened. This file should have /// checksum bytes for the data at the end of the file. /// </param> /// <param name="codec">codec</param> /// <param name="readsCounter">Counter for records read from disk</param> /// <exception cref="System.IO.IOException"/> public Reader(Configuration conf, FileSystem fs, Path file, CompressionCodec codec , Counters.Counter readsCounter) : this(conf, fs.Open(file), fs.GetFileStatus(file).GetLen(), codec, readsCounter) { }
public virtual bool ContentEquals(Counters.Counter counter) { return(realCounter.Equals(counter.GetUnderlyingCounter())); }
public override bool ContentEquals(Counters.Counter counter) { // TODO Auto-generated method stub return(false); }