/// <exception cref="System.IO.IOException"/> private void CopyPartitions(Path mapOutputPath, Path indexPath) { FileSystem localFs = FileSystem.GetLocal(jobConf); FileSystem rfs = ((LocalFileSystem)localFs).GetRaw(); FSDataOutputStream rawOutput = rfs.Create(mapOutputPath, true, BufSize); SpillRecord spillRecord = new SpillRecord(numberOfPartitions); IndexRecord indexRecord = new IndexRecord(); for (int i = 0; i < numberOfPartitions; i++) { indexRecord.startOffset = rawOutput.GetPos(); byte[] buffer = outStreams[i].ToByteArray(); IFileOutputStream checksumOutput = new IFileOutputStream(rawOutput); checksumOutput.Write(buffer); // Write checksum. checksumOutput.Finish(); // Write index record indexRecord.rawLength = (long)buffer.Length; indexRecord.partLength = rawOutput.GetPos() - indexRecord.startOffset; spillRecord.PutIndex(indexRecord, i); reporter.Progress(); } rawOutput.Close(); spillRecord.WriteToFile(indexPath, jobConf); }
internal MapOutputInfo(Shuffle _enclosing, Path mapOutputFileName, IndexRecord indexRecord ) { this._enclosing = _enclosing; this.mapOutputFileName = mapOutputFileName; this.indexRecord = indexRecord; }
/// <exception cref="System.IO.IOException"/> protected internal virtual void PopulateHeaders(IList<string> mapIds, string outputBaseStr , string user, int reduce, HttpRequest request, HttpResponse response, bool keepAliveParam , IDictionary<string, ShuffleHandler.Shuffle.MapOutputInfo> mapOutputInfoMap) { long contentLength = 0; foreach (string mapId in mapIds) { string @base = outputBaseStr + mapId; ShuffleHandler.Shuffle.MapOutputInfo outputInfo = this.GetMapOutputInfo(@base, mapId , reduce, user); if (mapOutputInfoMap.Count < this._enclosing.mapOutputMetaInfoCacheSize) { mapOutputInfoMap[mapId] = outputInfo; } // Index file Path indexFileName = this.lDirAlloc.GetLocalPathToRead(@base + "/file.out.index", this.conf); IndexRecord info = this.indexCache.GetIndexInformation(mapId, reduce, indexFileName , user); ShuffleHeader header = new ShuffleHeader(mapId, info.partLength, info.rawLength, reduce); DataOutputBuffer dob = new DataOutputBuffer(); header.Write(dob); contentLength += info.partLength; contentLength += dob.GetLength(); } // Now set the response headers. this.SetResponseHeaders(response, keepAliveParam, contentLength); }
/// <summary>Set spill offsets for given partition.</summary> public virtual void PutIndex(IndexRecord rec, int partition) { int pos = partition * MapTask.MapOutputIndexRecordLength / 8; entries.Put(pos, rec.startOffset); entries.Put(pos + 1, rec.rawLength); entries.Put(pos + 2, rec.partLength); }
/// <exception cref="System.IO.IOException"/> protected internal virtual ChannelFuture SendMapOutput(ChannelHandlerContext ctx, Org.Jboss.Netty.Channel.Channel ch, string user, string mapId, int reduce, ShuffleHandler.Shuffle.MapOutputInfo mapOutputInfo) { IndexRecord info = mapOutputInfo.indexRecord; ShuffleHeader header = new ShuffleHeader(mapId, info.partLength, info.rawLength, reduce); DataOutputBuffer dob = new DataOutputBuffer(); header.Write(dob); ch.Write(ChannelBuffers.WrappedBuffer(dob.GetData(), 0, dob.GetLength())); FilePath spillfile = new FilePath(mapOutputInfo.mapOutputFileName.ToString()); RandomAccessFile spill; try { spill = SecureIOUtils.OpenForRandomRead(spillfile, "r", user, null); } catch (FileNotFoundException) { ShuffleHandler.Log.Info(spillfile + " not found"); return null; } ChannelFuture writeFuture; if (ch.GetPipeline().Get<SslHandler>() == null) { FadvisedFileRegion partition = new FadvisedFileRegion(spill, info.startOffset, info .partLength, this._enclosing.manageOsCache, this._enclosing.readaheadLength, this ._enclosing.readaheadPool, spillfile.GetAbsolutePath(), this._enclosing.shuffleBufferSize , this._enclosing.shuffleTransferToAllowed); writeFuture = ch.Write(partition); writeFuture.AddListener(new _ChannelFutureListener_1135(partition)); } else { // TODO error handling; distinguish IO/connection failures, // attribute to appropriate spill output // HTTPS cannot be done with zero copy. FadvisedChunkedFile chunk = new FadvisedChunkedFile(spill, info.startOffset, info .partLength, this._enclosing.sslFileBufferSize, this._enclosing.manageOsCache, this ._enclosing.readaheadLength, this._enclosing.readaheadPool, spillfile.GetAbsolutePath ()); writeFuture = ch.Write(chunk); } this._enclosing.metrics.shuffleConnections.Incr(); this._enclosing.metrics.shuffleOutputBytes.Incr(info.partLength); // optimistic return writeFuture; }
/// <exception cref="System.IO.IOException"/> protected internal virtual ShuffleHandler.Shuffle.MapOutputInfo GetMapOutputInfo( string @base, string mapId, int reduce, string user) { // Index file Path indexFileName = this.lDirAlloc.GetLocalPathToRead(@base + "/file.out.index", this.conf); IndexRecord info = this.indexCache.GetIndexInformation(mapId, reduce, indexFileName , user); Path mapOutputFileName = this.lDirAlloc.GetLocalPathToRead(@base + "/file.out", this .conf); if (ShuffleHandler.Log.IsDebugEnabled()) { ShuffleHandler.Log.Debug(@base + " : " + mapOutputFileName + " : " + indexFileName ); } ShuffleHandler.Shuffle.MapOutputInfo outputInfo = new ShuffleHandler.Shuffle.MapOutputInfo (this, mapOutputFileName, info); return outputInfo; }
private static void CheckRecord(IndexRecord rec, long fill) { NUnit.Framework.Assert.AreEqual(fill, rec.startOffset); NUnit.Framework.Assert.AreEqual(fill, rec.rawLength); NUnit.Framework.Assert.AreEqual(fill, rec.partLength); }
/// <exception cref="System.Exception"/> public virtual void TestLRCPolicy() { Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); System.Console.Out.WriteLine("seed: " + seed); fs.Delete(p, true); conf.SetInt(TTConfig.TtIndexCache, 1); int partsPerMap = 1000; int bytesPerFile = partsPerMap * 24; IndexCache cache = new IndexCache(conf); // fill cache int totalsize = bytesPerFile; for (; totalsize < 1024 * 1024; totalsize += bytesPerFile) { Path f = new Path(p, Sharpen.Extensions.ToString(totalsize, 36)); WriteFile(fs, f, totalsize, partsPerMap); IndexRecord rec = cache.GetIndexInformation(Sharpen.Extensions.ToString(totalsize , 36), r.Next(partsPerMap), f, UserGroupInformation.GetCurrentUser().GetShortUserName ()); CheckRecord(rec, totalsize); } // delete files, ensure cache retains all elem foreach (FileStatus stat in fs.ListStatus(p)) { fs.Delete(stat.GetPath(), true); } for (int i = bytesPerFile; i < 1024 * 1024; i += bytesPerFile) { Path f = new Path(p, Sharpen.Extensions.ToString(i, 36)); IndexRecord rec = cache.GetIndexInformation(Sharpen.Extensions.ToString(i, 36), r .Next(partsPerMap), f, UserGroupInformation.GetCurrentUser().GetShortUserName()); CheckRecord(rec, i); } // push oldest (bytesPerFile) out of cache Path f_1 = new Path(p, Sharpen.Extensions.ToString(totalsize, 36)); WriteFile(fs, f_1, totalsize, partsPerMap); cache.GetIndexInformation(Sharpen.Extensions.ToString(totalsize, 36), r.Next(partsPerMap ), f_1, UserGroupInformation.GetCurrentUser().GetShortUserName()); fs.Delete(f_1, false); // oldest fails to read, or error bool fnf = false; try { cache.GetIndexInformation(Sharpen.Extensions.ToString(bytesPerFile, 36), r.Next(partsPerMap ), new Path(p, Sharpen.Extensions.ToString(bytesPerFile)), UserGroupInformation. GetCurrentUser().GetShortUserName()); } catch (IOException e) { if (e.InnerException == null || !(e.InnerException is FileNotFoundException)) { throw; } else { fnf = true; } } if (!fnf) { Fail("Failed to push out last entry"); } // should find all the other entries for (int i_1 = bytesPerFile << 1; i_1 < 1024 * 1024; i_1 += bytesPerFile) { IndexRecord rec = cache.GetIndexInformation(Sharpen.Extensions.ToString(i_1, 36), r.Next(partsPerMap), new Path(p, Sharpen.Extensions.ToString(i_1, 36)), UserGroupInformation .GetCurrentUser().GetShortUserName()); CheckRecord(rec, i_1); } IndexRecord rec_1 = cache.GetIndexInformation(Sharpen.Extensions.ToString(totalsize , 36), r.Next(partsPerMap), f_1, UserGroupInformation.GetCurrentUser().GetShortUserName ()); CheckRecord(rec_1, totalsize); }