Пример #1
0
 /// <exception cref="System.IO.IOException"/>
 private static JobSplit.SplitMetaInfo[] WriteOldSplits(InputSplit[] splits, FSDataOutputStream
                                                        @out, Configuration conf)
 {
     JobSplit.SplitMetaInfo[] info = new JobSplit.SplitMetaInfo[splits.Length];
     if (splits.Length != 0)
     {
         int  i                 = 0;
         long offset            = @out.GetPos();
         int  maxBlockLocations = conf.GetInt(MRConfig.MaxBlockLocationsKey, MRConfig.MaxBlockLocationsDefault
                                              );
         foreach (InputSplit split in splits)
         {
             long prevLen = @out.GetPos();
             Text.WriteString(@out, split.GetType().FullName);
             split.Write(@out);
             long     currLen   = @out.GetPos();
             string[] locations = split.GetLocations();
             if (locations.Length > maxBlockLocations)
             {
                 Log.Warn("Max block location exceeded for split: " + split + " splitsize: " + locations
                          .Length + " maxsize: " + maxBlockLocations);
                 locations = Arrays.CopyOf(locations, maxBlockLocations);
             }
             info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.GetLength());
             offset   += currLen - prevLen;
         }
     }
     return(info);
 }
Пример #2
0
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 private static JobSplit.SplitMetaInfo[] WriteNewSplits <T>(Configuration conf, T[]
                                                            array, FSDataOutputStream @out)
     where T : InputSplit
 {
     JobSplit.SplitMetaInfo[] info = new JobSplit.SplitMetaInfo[array.Length];
     if (array.Length != 0)
     {
         SerializationFactory factory = new SerializationFactory(conf);
         int i = 0;
         int maxBlockLocations = conf.GetInt(MRConfig.MaxBlockLocationsKey, MRConfig.MaxBlockLocationsDefault
                                             );
         long offset = @out.GetPos();
         foreach (T split in array)
         {
             long prevCount = @out.GetPos();
             Text.WriteString(@out, split.GetType().FullName);
             Org.Apache.Hadoop.IO.Serializer.Serializer <T> serializer = factory.GetSerializer(
                 (Type)split.GetType());
             serializer.Open(@out);
             serializer.Serialize(split);
             long     currCount = @out.GetPos();
             string[] locations = split.GetLocations();
             if (locations.Length > maxBlockLocations)
             {
                 Log.Warn("Max block location exceeded for split: " + split + " splitsize: " + locations
                          .Length + " maxsize: " + maxBlockLocations);
                 locations = Arrays.CopyOf(locations, maxBlockLocations);
             }
             info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.GetLength());
             offset   += currCount - prevCount;
         }
     }
     return(info);
 }
Пример #3
0
        /// <exception cref="System.IO.IOException"/>
        public static JobSplit.TaskSplitMetaInfo[] ReadSplitMetaInfo(JobID jobId, FileSystem
                                                                     fs, Configuration conf, Path jobSubmitDir)
        {
            long maxMetaInfoSize = conf.GetLong(MRJobConfig.SplitMetainfoMaxsize, MRJobConfig
                                                .DefaultSplitMetainfoMaxsize);
            Path       metaSplitFile = JobSubmissionFiles.GetJobSplitMetaFile(jobSubmitDir);
            string     jobSplitFile  = JobSubmissionFiles.GetJobSplitFile(jobSubmitDir).ToString();
            FileStatus fStatus       = fs.GetFileStatus(metaSplitFile);

            if (maxMetaInfoSize > 0 && fStatus.GetLen() > maxMetaInfoSize)
            {
                throw new IOException("Split metadata size exceeded " + maxMetaInfoSize + ". Aborting job "
                                      + jobId);
            }
            FSDataInputStream @in = fs.Open(metaSplitFile);

            byte[] header = new byte[JobSplit.MetaSplitFileHeader.Length];
            @in.ReadFully(header);
            if (!Arrays.Equals(JobSplit.MetaSplitFileHeader, header))
            {
                throw new IOException("Invalid header on split file");
            }
            int vers = WritableUtils.ReadVInt(@in);

            if (vers != JobSplit.MetaSplitVersion)
            {
                @in.Close();
                throw new IOException("Unsupported split version " + vers);
            }
            int numSplits = WritableUtils.ReadVInt(@in);

            //TODO: check for insane values
            JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = new JobSplit.TaskSplitMetaInfo[numSplits
                                                            ];
            for (int i = 0; i < numSplits; i++)
            {
                JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
                splitMetaInfo.ReadFields(@in);
                JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(jobSplitFile, splitMetaInfo
                                                                                 .GetStartOffset());
                allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, splitMetaInfo.GetLocations
                                                                         (), splitMetaInfo.GetInputDataLength());
            }
            @in.Close();
            return(allSplitMetaInfo);
        }