/// <exception cref="System.IO.IOException"/> private static JobSplit.SplitMetaInfo[] WriteOldSplits(InputSplit[] splits, FSDataOutputStream @out, Configuration conf) { JobSplit.SplitMetaInfo[] info = new JobSplit.SplitMetaInfo[splits.Length]; if (splits.Length != 0) { int i = 0; long offset = @out.GetPos(); int maxBlockLocations = conf.GetInt(MRConfig.MaxBlockLocationsKey, MRConfig.MaxBlockLocationsDefault ); foreach (InputSplit split in splits) { long prevLen = @out.GetPos(); Text.WriteString(@out, split.GetType().FullName); split.Write(@out); long currLen = @out.GetPos(); string[] locations = split.GetLocations(); if (locations.Length > maxBlockLocations) { Log.Warn("Max block location exceeded for split: " + split + " splitsize: " + locations .Length + " maxsize: " + maxBlockLocations); locations = Arrays.CopyOf(locations, maxBlockLocations); } info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.GetLength()); offset += currLen - prevLen; } } return(info); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private static JobSplit.SplitMetaInfo[] WriteNewSplits <T>(Configuration conf, T[] array, FSDataOutputStream @out) where T : InputSplit { JobSplit.SplitMetaInfo[] info = new JobSplit.SplitMetaInfo[array.Length]; if (array.Length != 0) { SerializationFactory factory = new SerializationFactory(conf); int i = 0; int maxBlockLocations = conf.GetInt(MRConfig.MaxBlockLocationsKey, MRConfig.MaxBlockLocationsDefault ); long offset = @out.GetPos(); foreach (T split in array) { long prevCount = @out.GetPos(); Text.WriteString(@out, split.GetType().FullName); Org.Apache.Hadoop.IO.Serializer.Serializer <T> serializer = factory.GetSerializer( (Type)split.GetType()); serializer.Open(@out); serializer.Serialize(split); long currCount = @out.GetPos(); string[] locations = split.GetLocations(); if (locations.Length > maxBlockLocations) { Log.Warn("Max block location exceeded for split: " + split + " splitsize: " + locations .Length + " maxsize: " + maxBlockLocations); locations = Arrays.CopyOf(locations, maxBlockLocations); } info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.GetLength()); offset += currCount - prevCount; } } return(info); }
/// <exception cref="System.IO.IOException"/> public static JobSplit.TaskSplitMetaInfo[] ReadSplitMetaInfo(JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) { long maxMetaInfoSize = conf.GetLong(MRJobConfig.SplitMetainfoMaxsize, MRJobConfig .DefaultSplitMetainfoMaxsize); Path metaSplitFile = JobSubmissionFiles.GetJobSplitMetaFile(jobSubmitDir); string jobSplitFile = JobSubmissionFiles.GetJobSplitFile(jobSubmitDir).ToString(); FileStatus fStatus = fs.GetFileStatus(metaSplitFile); if (maxMetaInfoSize > 0 && fStatus.GetLen() > maxMetaInfoSize) { throw new IOException("Split metadata size exceeded " + maxMetaInfoSize + ". Aborting job " + jobId); } FSDataInputStream @in = fs.Open(metaSplitFile); byte[] header = new byte[JobSplit.MetaSplitFileHeader.Length]; @in.ReadFully(header); if (!Arrays.Equals(JobSplit.MetaSplitFileHeader, header)) { throw new IOException("Invalid header on split file"); } int vers = WritableUtils.ReadVInt(@in); if (vers != JobSplit.MetaSplitVersion) { @in.Close(); throw new IOException("Unsupported split version " + vers); } int numSplits = WritableUtils.ReadVInt(@in); //TODO: check for insane values JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = new JobSplit.TaskSplitMetaInfo[numSplits ]; for (int i = 0; i < numSplits; i++) { JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo(); splitMetaInfo.ReadFields(@in); JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(jobSplitFile, splitMetaInfo .GetStartOffset()); allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, splitMetaInfo.GetLocations (), splitMetaInfo.GetInputDataLength()); } @in.Close(); return(allSplitMetaInfo); }