public override int ReadChunk(out InputTextCunk chunk, int max_char_count) { List<String> list = new List<string>(max_char_count); int count = 0; for (int i = 0; i < max_char_count; i++) { var s = ReadRecord(); if (s == null) break; count += s.Length; list.Add(s); if (count > max_char_count) break; } chunk = new InputTextCunk(list, count); return count; }
private IEnumerable <Dictionary <InterKey, List <InterValue> > > doMap(InputTextCunk chunk, int thread_num = 0) { Stopwatch watch = new Stopwatch(); var input_records = chunk.Records; var char_count = chunk.CharCount; watch.Restart(); var dics = new ConcurrentBag <Dictionary <InterKey, List <InterValue> > >(); ParallelOptions option = new ParallelOptions(); if (thread_num != 0) { option.MaxDegreeOfParallelism = thread_num; } Parallel.ForEach(Partitioner.Create(0, input_records.Count), option, (range) => { var dic = new Dictionary <InterKey, List <InterValue> >(); var context = new MapContext <InterKey, InterValue>(dic); for (int i = range.Item1; i < range.Item2; i++) { mapFunc.Invoke(input_records[i], context); } dics.Add(dic); Interlocked.Add(ref mapperInfo.MapEmits, context.EmitCount); }); watch.Stop(); mapperInfo.ProcessedRecords += input_records.Count; mapperInfo.ProcessedChars += char_count; if (watch.Elapsed > maxWorkPeriod) { maxChunkSize = Math.Min(maxChunkSize / 2, maxCharsToMap); } if (watch.Elapsed < minWorkPeriod) { maxChunkSize = Math.Min(maxChunkSize * 2, maxCharsToMap); } logger.Debug("Mapped a chunk with {0} chars in {1}", StringFormatter.DigitGrouped(char_count), watch.Elapsed); return(dics); }
public override int ReadChunk(out InputTextCunk chunk, int max_char_count) { List <String> list = new List <string>(max_char_count); int count = 0; for (int i = 0; i < max_char_count; i++) { var s = ReadRecord(); if (s == null) { break; } count += s.Length; list.Add(s); if (count > max_char_count) { break; } } chunk = new InputTextCunk(list, count); return(count); }