Exemplo n.º 1
0
 public virtual void PutBackKnownMapOutput(MapHost host, TaskAttemptID mapId)
 {
     lock (this)
     {
         host.AddKnownMap(mapId);
     }
 }
Exemplo n.º 2
0
 public override void Run()
 {
     try
     {
         while (true)
         {
             // take the first host that has an expired penalty
             MapHost host = this._enclosing.penalties.Take().host;
             lock (this._enclosing._enclosing)
             {
                 if (host.MarkAvailable() == MapHost.State.Pending)
                 {
                     this._enclosing.pendingHosts.AddItem(host);
                     Sharpen.Runtime.NotifyAll(this._enclosing._enclosing);
                 }
             }
         }
     }
     catch (Exception)
     {
         return;
     }
     catch (Exception t)
     {
         this._enclosing.reporter.ReportException(t);
     }
 }
Exemplo n.º 3
0
 private void AbortConnect(MapHost host, ICollection <TaskAttemptID> remaining)
 {
     foreach (TaskAttemptID left in remaining)
     {
         scheduler.PutBackKnownMapOutput(host, left);
     }
     CloseConnection();
 }
Exemplo n.º 4
0
        public virtual void TestSucceedAndFailedCopyMap <K, V>()
        {
            JobConf job = new JobConf();

            job.SetNumMapTasks(2);
            //mock creation
            TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol
                                                                            >();
            Reporter   mockReporter   = Org.Mockito.Mockito.Mock <Reporter>();
            FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>();
            Type       combinerClass  = job.GetCombinerClass();

            Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector
                                                                             <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>();
            // needed for mock with generic
            TaskAttemptID     mockTaskAttemptID     = Org.Mockito.Mockito.Mock <TaskAttemptID>();
            LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator
                                                                                >();
            CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec
                                                                              >();

            Counters.Counter mockCounter       = Org.Mockito.Mockito.Mock <Counters.Counter>();
            TaskStatus       mockTaskStatus    = Org.Mockito.Mockito.Mock <TaskStatus>();
            Progress         mockProgress      = Org.Mockito.Mockito.Mock <Progress>();
            MapOutputFile    mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>();

            Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task
                                                                               >();
            MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>();

            ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K
                                                                                              , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator
                                                                                                   , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector,
                                                                                                   mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus
                                                                                                   , mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
            TaskStatus status   = new _TaskStatus_251();
            Progress   progress = new Progress();
            ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status
                                                                                    , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes
                                                                                        (), context.GetFailedShuffleCounter());
            MapHost       host1           = new MapHost("host1", null);
            TaskAttemptID failedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0)
                                                                         , TaskType.Map, 0), 0);
            TaskAttemptID succeedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0
                                                                                    ), TaskType.Map, 1), 1);

            // handle output fetch failure for failedAttemptID, part I
            scheduler.HostFailed(host1.GetHostName());
            // handle output fetch succeed for succeedAttemptID
            long bytes = (long)500 * 1024 * 1024;

            scheduler.CopySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
            // handle output fetch failure for failedAttemptID, part II
            // for MAPREDUCE-6361: verify no NPE exception get thrown out
            scheduler.CopyFailed(failedAttemptID, host1, true, false);
        }
Exemplo n.º 5
0
 public virtual void CopyFailed(TaskAttemptID mapId, MapHost host, bool readError,
                                bool connectExcpt)
 {
     lock (this)
     {
         host.Penalize();
         int failures = 1;
         if (failureCounts.Contains(mapId))
         {
             IntWritable x = failureCounts[mapId];
             x.Set(x.Get() + 1);
             failures = x.Get();
         }
         else
         {
             failureCounts[mapId] = new IntWritable(1);
         }
         string      hostname      = host.GetHostName();
         IntWritable hostFailedNum = hostFailures[hostname];
         // MAPREDUCE-6361: hostname could get cleanup from hostFailures in another
         // thread with copySucceeded.
         // In this case, add back hostname to hostFailures to get rid of NPE issue.
         if (hostFailedNum == null)
         {
             hostFailures[hostname] = new IntWritable(1);
         }
         //report failure if already retried maxHostFailures times
         bool hostFail = hostFailures[hostname].Get() > GetMaxHostFailures() ? true : false;
         if (failures >= abortFailureLimit)
         {
             try
             {
                 throw new IOException(failures + " failures downloading " + mapId);
             }
             catch (IOException ie)
             {
                 reporter.ReportException(ie);
             }
         }
         CheckAndInformMRAppMaster(failures, mapId, readError, connectExcpt, hostFail);
         CheckReducerHealth();
         long delay = (long)(InitialPenalty * Math.Pow(PenaltyGrowthRate, failures));
         if (delay > maxDelay)
         {
             delay = maxDelay;
         }
         penalties.AddItem(new ShuffleSchedulerImpl.Penalty(host, delay));
         failedShuffleCounter.Increment(1);
     }
 }
Exemplo n.º 6
0
 public virtual void FreeHost(MapHost host)
 {
     lock (this)
     {
         if (host.GetState() != MapHost.State.Penalized)
         {
             if (host.MarkAvailable() == MapHost.State.Pending)
             {
                 pendingHosts.AddItem(host);
                 Sharpen.Runtime.NotifyAll(this);
             }
         }
         Log.Info(host + " freed by " + Sharpen.Thread.CurrentThread().GetName() + " in "
                  + (Time.MonotonicNow() - shuffleStart.Get()) + "ms");
     }
 }
Exemplo n.º 7
0
        /// <summary>Create the map-output-url.</summary>
        /// <remarks>
        /// Create the map-output-url. This will contain all the map ids
        /// separated by commas
        /// </remarks>
        /// <param name="host"/>
        /// <param name="maps"/>
        /// <returns/>
        /// <exception cref="System.UriFormatException"/>
        private Uri GetMapOutputURL(MapHost host, ICollection <TaskAttemptID> maps)
        {
            // Get the base url
            StringBuilder url   = new StringBuilder(host.GetBaseUrl());
            bool          first = true;

            foreach (TaskAttemptID mapId in maps)
            {
                if (!first)
                {
                    url.Append(",");
                }
                url.Append(mapId);
                first = false;
            }
            Log.Debug("MapOutput URL for " + host + " -> " + url.ToString());
            return(new Uri(url.ToString()));
        }
Exemplo n.º 8
0
        /// <exception cref="System.IO.IOException"/>
        public override void Shuffle(MapHost host, InputStream input, long compressedLength
                                     , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter)
        {
            input = new IFileInputStream(input, compressedLength, conf);
            // Copy data to local-disk
            long bytesLeft = compressedLength;

            try
            {
                int    BytesToRead = 64 * 1024;
                byte[] buf         = new byte[BytesToRead];
                while (bytesLeft > 0)
                {
                    int n = ((IFileInputStream)input).ReadWithChecksum(buf, 0, (int)Math.Min(bytesLeft
                                                                                             , BytesToRead));
                    if (n < 0)
                    {
                        throw new IOException("read past end of stream reading " + GetMapId());
                    }
                    disk.Write(buf, 0, n);
                    bytesLeft -= n;
                    metrics.InputBytes(n);
                    reporter.Progress();
                }
                Log.Info("Read " + (compressedLength - bytesLeft) + " bytes from map-output for "
                         + GetMapId());
                disk.Close();
            }
            catch (IOException ioe)
            {
                // Close the streams
                IOUtils.Cleanup(Log, input, disk);
                // Re-throw
                throw;
            }
            // Sanity check
            if (bytesLeft != 0)
            {
                throw new IOException("Incomplete map output received for " + GetMapId() + " from "
                                      + host.GetHostName() + " (" + bytesLeft + " bytes missing of " + compressedLength
                                      + ")");
            }
            this.compressedSize = compressedLength;
        }
Exemplo n.º 9
0
 public virtual void AddKnownMapOutput(string hostName, string hostUrl, TaskAttemptID
                                       mapId)
 {
     lock (this)
     {
         MapHost host = mapLocations[hostName];
         if (host == null)
         {
             host = new MapHost(hostName, hostUrl);
             mapLocations[hostName] = host;
         }
         host.AddKnownMap(mapId);
         // Mark the host as pending
         if (host.GetState() == MapHost.State.Pending)
         {
             pendingHosts.AddItem(host);
             Sharpen.Runtime.NotifyAll(this);
         }
     }
 }
Exemplo n.º 10
0
        /// <exception cref="System.IO.IOException"/>
        private void SetupConnectionsWithRetry(MapHost host, ICollection <TaskAttemptID> remaining
                                               , Uri url)
        {
            OpenConnectionWithRetry(host, remaining, url);
            if (stopped)
            {
                return;
            }
            // generate hash of the url
            string msgToEncode = SecureShuffleUtils.BuildMsgFrom(url);
            string encHash     = SecureShuffleUtils.HashFromString(msgToEncode, shuffleSecretKey);

            SetupShuffleConnection(encHash);
            Connect(connection, connectionTimeout);
            // verify that the thread wasn't stopped during calls to connect
            if (stopped)
            {
                return;
            }
            VerifyConnection(url, msgToEncode, encHash);
        }
Exemplo n.º 11
0
        /// <exception cref="System.IO.IOException"/>
        private void OpenConnectionWithRetry(MapHost host, ICollection <TaskAttemptID> remaining
                                             , Uri url)
        {
            long startTime  = Time.MonotonicNow();
            bool shouldWait = true;

            while (shouldWait)
            {
                try
                {
                    OpenConnection(url);
                    shouldWait = false;
                }
                catch (IOException e)
                {
                    if (!fetchRetryEnabled)
                    {
                        // throw exception directly if fetch's retry is not enabled
                        throw;
                    }
                    if ((Time.MonotonicNow() - startTime) >= this.fetchRetryTimeout)
                    {
                        Log.Warn("Failed to connect to host: " + url + "after " + fetchRetryTimeout + " milliseconds."
                                 );
                        throw;
                    }
                    try
                    {
                        Sharpen.Thread.Sleep(this.fetchRetryInterval);
                    }
                    catch (Exception)
                    {
                        if (stopped)
                        {
                            return;
                        }
                    }
                }
            }
        }
Exemplo n.º 12
0
        /// <summary>
        /// check if hit timeout of retry, if not, throw an exception and start a
        /// new round of retry.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        private void CheckTimeoutOrRetry(MapHost host, IOException ioe)
        {
            // First time to retry.
            long currentTime = Time.MonotonicNow();

            if (retryStartTime == 0)
            {
                retryStartTime = currentTime;
            }
            // Retry is not timeout, let's do retry with throwing an exception.
            if (currentTime - retryStartTime < this.fetchRetryTimeout)
            {
                Log.Warn("Shuffle output from " + host.GetHostName() + " failed, retry it.", ioe);
                throw ioe;
            }
            else
            {
                // timeout, prepare to be failed.
                Log.Warn("Timeout for copying MapOutput with retry on host " + host + "after " +
                         fetchRetryTimeout + " milliseconds.");
            }
        }
Exemplo n.º 13
0
        private DataInputStream OpenShuffleUrl(MapHost host, ICollection <TaskAttemptID> remaining
                                               , Uri url)
        {
            DataInputStream input = null;

            try
            {
                SetupConnectionsWithRetry(host, remaining, url);
                if (stopped)
                {
                    AbortConnect(host, remaining);
                }
                else
                {
                    input = new DataInputStream(connection.GetInputStream());
                }
            }
            catch (IOException ie)
            {
                bool connectExcpt = ie is ConnectException;
                ioErrs.Increment(1);
                Log.Warn("Failed to connect to " + host + " with " + remaining.Count + " map outputs"
                         , ie);
                // If connect did not succeed, just mark all the maps as failed,
                // indirectly penalizing the host
                scheduler.HostFailed(host.GetHostName());
                foreach (TaskAttemptID left in remaining)
                {
                    scheduler.CopyFailed(left, host, false, connectExcpt);
                }
                // Add back all the remaining maps, WITHOUT marking them as failed
                foreach (TaskAttemptID left_1 in remaining)
                {
                    scheduler.PutBackKnownMapOutput(host, left_1);
                }
            }
            return(input);
        }
Exemplo n.º 14
0
 /// <exception cref="System.Exception"/>
 public virtual MapHost GetHost()
 {
     lock (this)
     {
         while (pendingHosts.IsEmpty())
         {
             Sharpen.Runtime.Wait(this);
         }
         MapHost host = null;
         IEnumerator <MapHost> iter = pendingHosts.GetEnumerator();
         int numToPick = random.Next(pendingHosts.Count);
         for (int i = 0; i <= numToPick; ++i)
         {
             host = iter.Next();
         }
         pendingHosts.Remove(host);
         host.MarkBusy();
         Log.Info("Assigning " + host + " with " + host.GetNumKnownMapOutputs() + " to " +
                  Sharpen.Thread.CurrentThread().GetName());
         shuffleStart.Set(Time.MonotonicNow());
         return(host);
     }
 }
Exemplo n.º 15
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void CopySucceeded(TaskAttemptID mapId, MapHost host, long bytes,
                                   long startMillis, long endMillis, MapOutput <K, V> output)
 {
     lock (this)
     {
         Sharpen.Collections.Remove(failureCounts, mapId);
         Sharpen.Collections.Remove(hostFailures, host.GetHostName());
         int mapIndex = mapId.GetTaskID().GetId();
         if (!finishedMaps[mapIndex])
         {
             output.Commit();
             finishedMaps[mapIndex] = true;
             shuffledMapsCounter.Increment(1);
             if (--remainingMaps == 0)
             {
                 Sharpen.Runtime.NotifyAll(this);
             }
             // update single copy task status
             long copyMillis = (endMillis - startMillis);
             if (copyMillis == 0)
             {
                 copyMillis = 1;
             }
             float  bytesPerMillis     = (float)bytes / copyMillis;
             float  transferRate       = bytesPerMillis * BytesPerMillisToMbs;
             string individualProgress = "copy task(" + mapId + " succeeded" + " at " + mbpsFormat
                                         .Format(transferRate) + " MB/s)";
             // update the aggregated status
             copyTimeTracker.Add(startMillis, endMillis);
             totalBytesShuffledTillNow += bytes;
             UpdateStatus(individualProgress);
             reduceShuffleBytes.Increment(bytes);
             lastProgressTime = Time.MonotonicNow();
             Log.Debug("map " + mapId + " done " + status.GetStateString());
         }
     }
 }
Exemplo n.º 16
0
 public override void Run()
 {
     try
     {
         while (!stopped && !Sharpen.Thread.CurrentThread().IsInterrupted())
         {
             MapHost host = null;
             try
             {
                 // If merge is on, block
                 merger.WaitForResource();
                 // Get a host to shuffle from
                 host = scheduler.GetHost();
                 metrics.ThreadBusy();
                 // Shuffle
                 CopyFromHost(host);
             }
             finally
             {
                 if (host != null)
                 {
                     scheduler.FreeHost(host);
                     metrics.ThreadFree();
                 }
             }
         }
     }
     catch (Exception)
     {
         return;
     }
     catch (Exception t)
     {
         exceptionReporter.ReportException(t);
     }
 }
Exemplo n.º 17
0
        /// <exception cref="System.IO.IOException"/>
        public override void Shuffle(MapHost host, InputStream input, long compressedLength
                                     , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter)
        {
            IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, conf);

            input = checksumIn;
            // Are map-outputs compressed?
            if (codec != null)
            {
                decompressor.Reset();
                input = codec.CreateInputStream(input, decompressor);
            }
            try
            {
                IOUtils.ReadFully(input, memory, 0, memory.Length);
                metrics.InputBytes(memory.Length);
                reporter.Progress();
                Log.Info("Read " + memory.Length + " bytes from map-output for " + GetMapId());
                if (input.Read() >= 0)
                {
                    throw new IOException("Unexpected extra bytes from input stream for " + GetMapId(
                                              ));
                }
            }
            catch (IOException ioe)
            {
                // Close the streams
                IOUtils.Cleanup(Log, input);
                // Re-throw
                throw;
            }
            finally
            {
                CodecPool.ReturnDecompressor(decompressor);
            }
        }
Exemplo n.º 18
0
 public virtual IList <TaskAttemptID> GetMapsForHost(MapHost host)
 {
     lock (this)
     {
         IList <TaskAttemptID>       list   = host.GetAndClearKnownMaps();
         IEnumerator <TaskAttemptID> itr    = list.GetEnumerator();
         IList <TaskAttemptID>       result = new AList <TaskAttemptID>();
         int includedMaps = 0;
         int totalSize    = list.Count;
         // find the maps that we still need, up to the limit
         while (itr.HasNext())
         {
             TaskAttemptID id = itr.Next();
             if (!obsoleteMaps.Contains(id) && !finishedMaps[id.GetTaskID().GetId()])
             {
                 result.AddItem(id);
                 if (++includedMaps >= MaxMapsAtOnce)
                 {
                     break;
                 }
             }
         }
         // put back the maps left after the limit
         while (itr.HasNext())
         {
             TaskAttemptID id = itr.Next();
             if (!obsoleteMaps.Contains(id) && !finishedMaps[id.GetTaskID().GetId()])
             {
                 host.AddKnownMap(id);
             }
         }
         Log.Info("assigned " + includedMaps + " of " + totalSize + " to " + host + " to "
                  + Sharpen.Thread.CurrentThread().GetName());
         return(result);
     }
 }
Exemplo n.º 19
0
        /// <exception cref="System.IO.IOException"/>
        private TaskAttemptID[] CopyMapOutput(MapHost host, DataInputStream input, ICollection
                                              <TaskAttemptID> remaining, bool canRetry)
        {
            MapOutput <K, V> mapOutput          = null;
            TaskAttemptID    mapId              = null;
            long             decompressedLength = -1;
            long             compressedLength   = -1;

            try
            {
                long startTime = Time.MonotonicNow();
                int  forReduce = -1;
                //Read the shuffle header
                try
                {
                    ShuffleHeader header = new ShuffleHeader();
                    header.ReadFields(input);
                    mapId              = TaskAttemptID.ForName(header.mapId);
                    compressedLength   = header.compressedLength;
                    decompressedLength = header.uncompressedLength;
                    forReduce          = header.forReduce;
                }
                catch (ArgumentException e)
                {
                    badIdErrs.Increment(1);
                    Log.Warn("Invalid map id ", e);
                    //Don't know which one was bad, so consider all of them as bad
                    return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count]));
                }
                InputStream @is = input;
                @is = CryptoUtils.WrapIfNecessary(jobConf, @is, compressedLength);
                compressedLength   -= CryptoUtils.CryptoPadding(jobConf);
                decompressedLength -= CryptoUtils.CryptoPadding(jobConf);
                // Do some basic sanity verification
                if (!VerifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId
                                  ))
                {
                    return(new TaskAttemptID[] { mapId });
                }
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("header: " + mapId + ", len: " + compressedLength + ", decomp len: " +
                              decompressedLength);
                }
                // Get the location for the map output - either in-memory or on-disk
                try
                {
                    mapOutput = merger.Reserve(mapId, decompressedLength, id);
                }
                catch (IOException ioe)
                {
                    // kill this reduce attempt
                    ioErrs.Increment(1);
                    scheduler.ReportLocalError(ioe);
                    return(EmptyAttemptIdArray);
                }
                // Check if we can shuffle *now* ...
                if (mapOutput == null)
                {
                    Log.Info("fetcher#" + id + " - MergeManager returned status WAIT ...");
                    //Not an error but wait to process data.
                    return(EmptyAttemptIdArray);
                }
                // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError
                // on decompression failures. Catching and re-throwing as IOException
                // to allow fetch failure logic to be processed
                try
                {
                    // Go!
                    Log.Info("fetcher#" + id + " about to shuffle output of map " + mapOutput.GetMapId
                                 () + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput
                             .GetDescription());
                    mapOutput.Shuffle(host, @is, compressedLength, decompressedLength, metrics, reporter
                                      );
                }
                catch (InternalError e)
                {
                    Log.Warn("Failed to shuffle for fetcher#" + id, e);
                    throw new IOException(e);
                }
                // Inform the shuffle scheduler
                long endTime = Time.MonotonicNow();
                // Reset retryStartTime as map task make progress if retried before.
                retryStartTime = 0;
                scheduler.CopySucceeded(mapId, host, compressedLength, startTime, endTime, mapOutput
                                        );
                // Note successful shuffle
                remaining.Remove(mapId);
                metrics.SuccessFetch();
                return(null);
            }
            catch (IOException ioe)
            {
                if (mapOutput != null)
                {
                    mapOutput.Abort();
                }
                if (canRetry)
                {
                    CheckTimeoutOrRetry(host, ioe);
                }
                ioErrs.Increment(1);
                if (mapId == null || mapOutput == null)
                {
                    Log.Warn("fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength
                             + ", " + compressedLength, ioe);
                    if (mapId == null)
                    {
                        return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count]));
                    }
                    else
                    {
                        return(new TaskAttemptID[] { mapId });
                    }
                }
                Log.Warn("Failed to shuffle output of " + mapId + " from " + host.GetHostName(),
                         ioe);
                // Inform the shuffle-scheduler
                metrics.FailedFetch();
                return(new TaskAttemptID[] { mapId });
            }
        }
Exemplo n.º 20
0
        protected internal virtual void CopyFromHost(MapHost host)
        {
            // reset retryStartTime for a new host
            retryStartTime = 0;
            // Get completed maps on 'host'
            IList <TaskAttemptID> maps = scheduler.GetMapsForHost(host);

            // Sanity check to catch hosts with only 'OBSOLETE' maps,
            // especially at the tail of large jobs
            if (maps.Count == 0)
            {
                return;
            }
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Fetcher " + id + " going to fetch from " + host + " for: " + maps);
            }
            // List of maps to be fetched yet
            ICollection <TaskAttemptID> remaining = new HashSet <TaskAttemptID>(maps);
            // Construct the url and connect
            Uri             url   = GetMapOutputURL(host, maps);
            DataInputStream input = OpenShuffleUrl(host, remaining, url);

            if (input == null)
            {
                return;
            }
            try
            {
                // Loop through available map-outputs and fetch them
                // On any error, faildTasks is not null and we exit
                // after putting back the remaining maps to the
                // yet_to_be_fetched list and marking the failed tasks.
                TaskAttemptID[] failedTasks = null;
                while (!remaining.IsEmpty() && failedTasks == null)
                {
                    try
                    {
                        failedTasks = CopyMapOutput(host, input, remaining, fetchRetryEnabled);
                    }
                    catch (IOException)
                    {
                        //
                        // Setup connection again if disconnected by NM
                        connection.Disconnect();
                        // Get map output from remaining tasks only.
                        url   = GetMapOutputURL(host, remaining);
                        input = OpenShuffleUrl(host, remaining, url);
                        if (input == null)
                        {
                            return;
                        }
                    }
                }
                if (failedTasks != null && failedTasks.Length > 0)
                {
                    Log.Warn("copyMapOutput failed for tasks " + Arrays.ToString(failedTasks));
                    scheduler.HostFailed(host.GetHostName());
                    foreach (TaskAttemptID left in failedTasks)
                    {
                        scheduler.CopyFailed(left, host, true, false);
                    }
                }
                // Sanity check
                if (failedTasks == null && !remaining.IsEmpty())
                {
                    throw new IOException("server didn't return all expected map outputs: " + remaining
                                          .Count + " left.");
                }
                input.Close();
                input = null;
            }
            finally
            {
                if (input != null)
                {
                    IOUtils.Cleanup(Log, input);
                    input = null;
                }
                foreach (TaskAttemptID left in remaining)
                {
                    scheduler.PutBackKnownMapOutput(host, left);
                }
            }
        }
Exemplo n.º 21
0
 /// <exception cref="System.IO.IOException"/>
 public abstract void Shuffle(MapHost host, InputStream input, long compressedLength
                              , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter);
Exemplo n.º 22
0
        public virtual void TestCorruptedIFile()
        {
            int  fetcher             = 7;
            Path onDiskMapOutputPath = new Path(name.GetMethodName() + "/foo");
            Path shuffledToDisk      = OnDiskMapOutput.GetTempPath(onDiskMapOutputPath, fetcher);

            fs = FileSystem.GetLocal(job).GetRaw();
            MapOutputFile mof = Org.Mockito.Mockito.Mock <MapOutputFile>();
            OnDiskMapOutput <Text, Text> odmo = new OnDiskMapOutput <Text, Text>(map1ID, id, mm
                                                                                 , 100L, job, mof, fetcher, true, fs, onDiskMapOutputPath);
            string                mapData = "MAPDATA12345678901234567890";
            ShuffleHeader         header  = new ShuffleHeader(map1ID.ToString(), 14, 10, 1);
            ByteArrayOutputStream bout    = new ByteArrayOutputStream();
            DataOutputStream      dos     = new DataOutputStream(bout);
            IFileOutputStream     ios     = new IFileOutputStream(dos);

            header.Write(dos);
            int headerSize = dos.Size();

            try
            {
                ios.Write(Sharpen.Runtime.GetBytesForString(mapData));
            }
            finally
            {
                ios.Close();
            }
            int dataSize = bout.Size() - headerSize;
            // Ensure that the OnDiskMapOutput shuffler can successfully read the data.
            MapHost host             = new MapHost("TestHost", "http://test/url");
            ByteArrayInputStream bin = new ByteArrayInputStream(bout.ToByteArray());

            try
            {
                // Read past the shuffle header.
                bin.Read(new byte[headerSize], 0, headerSize);
                odmo.Shuffle(host, bin, dataSize, dataSize, metrics, Reporter.Null);
            }
            finally
            {
                bin.Close();
            }
            // Now corrupt the IFile data.
            byte[] corrupted = bout.ToByteArray();
            corrupted[headerSize + (dataSize / 2)] = unchecked ((int)(0x0));
            try
            {
                bin = new ByteArrayInputStream(corrupted);
                // Read past the shuffle header.
                bin.Read(new byte[headerSize], 0, headerSize);
                odmo.Shuffle(host, bin, dataSize, dataSize, metrics, Reporter.Null);
                NUnit.Framework.Assert.Fail("OnDiskMapOutput.shuffle didn't detect the corrupted map partition file"
                                            );
            }
            catch (ChecksumException e)
            {
                Log.Info("The expected checksum exception was thrown.", e);
            }
            finally
            {
                bin.Close();
            }
            // Ensure that the shuffled file can be read.
            IFileInputStream iFin = new IFileInputStream(fs.Open(shuffledToDisk), dataSize, job
                                                         );

            try
            {
                iFin.Read(new byte[dataSize], 0, dataSize);
            }
            finally
            {
                iFin.Close();
            }
        }
Exemplo n.º 23
0
 internal Penalty(MapHost host, long delay)
 {
     this.host    = host;
     this.endTime = Time.MonotonicNow() + delay;
 }