/// <summary> /// Partitioning evenly ensures that each processor used by PLinq will deal /// with a partition of equal "weight". In this case, we make sure each /// partition contains not only the same amount of files, but also (as close /// to as possible) the same amount of "bytes". For example, if we have 100 /// files totaling 32MB and 4 processors, we will end up with 4 partitions /// of (exactly) 25 files totalling (approximately) 8MB each. /// /// Note: This code inside this method is not the cleanest, but it is /// written in a way that tries to minimiz the # of large array allocations. /// </summary> public static IList <FileContentsPiece> CreateFilePieces(ICollection <FileWithContents> files) { var filesWithContents = FilterFilesWithContents(files); // Factory for file identifiers var currentFileId = 0; Func <int> fileIdFactory = () => currentFileId++; // Predicate to figure out if a file is "small" Func <FileWithContents, bool> isSmallFile = x => x.Contents.ByteLength <= ChunkSize; // Count the total # of small and large files, while splitting large files // into their fragments. var smallFilesCount = 0; var largeFiles = new List <FileContentsPiece>(filesWithContents.Count / 100); foreach (var fileData in filesWithContents) { if (isSmallFile(fileData)) { smallFilesCount++; } else { var splitFileContents = SplitFileContents(fileData, fileIdFactory()); largeFiles.AddRange(splitFileContents); } } var totalFileCount = smallFilesCount + largeFiles.Count; // Store elements in their partitions // # of partitions = # of logical processors var filePieces = new FileContentsPiece[totalFileCount]; var partitionCount = Environment.ProcessorCount; var generator = new PartitionIndicesGenerator(totalFileCount, partitionCount); // Store large files foreach (var item in largeFiles) { filePieces[generator.Next()] = item; } // Store small files foreach (var fileData in filesWithContents) { if (isSmallFile(fileData)) { var item = fileData.Contents.CreatePiece( fileData.FileName, fileIdFactory(), fileData.Contents.TextRange); filePieces[generator.Next()] = item; } } FileDatabaseDebugLogger.LogFilePieces(filesWithContents, filePieces, partitionCount); return(filePieces); }
/// <summary> /// Partitioning evenly ensures that each processor used by PLinq will deal /// with a partition of equal "weight". In this case, we make sure each /// partition contains not only the same amount of files, but also (as close /// to as possible) the same amount of "bytes". For example, if we have 100 /// files totaling 32MB and 4 processors, we will end up with 4 partitions /// of (exactly) 25 files totalling (approximately) 8MB each. /// /// Note: This code inside this method is not the cleanest, but it is /// written in a way that tries to minimiz the # of large array allocations. /// </summary> private static IList <IFileContentsPiece> CreateFilePieces(ICollection <FileData> filesWithContents) { // Factory for file identifiers int currentFileId = 0; Func <int> fileIdFactory = () => currentFileId++; // Predicate to figure out if a file is "small" Func <FileData, bool> isSmallFile = x => x.Contents.ByteLength <= ChunkSize; // Count the total # of small and large files, while splitting large files // into their fragments. var smallFilesCount = 0; var largeFiles = new List <FileContentsPiece>(filesWithContents.Count / 100); foreach (var fileData in filesWithContents) { if (isSmallFile(fileData)) { smallFilesCount++; } else { var splitFileContents = SplitFileContents(fileData, fileIdFactory()); largeFiles.AddRange(splitFileContents); } } var totalFileCount = smallFilesCount + largeFiles.Count; // Store elements in their partitions // # of partitions = # of logical processors var fileContents = new FileContentsPiece[totalFileCount]; var partitionCount = Environment.ProcessorCount; var generator = new PartitionIndicesGenerator( totalFileCount, partitionCount); // Store large files foreach (var item in largeFiles) { fileContents[generator.Next()] = item; } // Store small files foreach (var fileData in filesWithContents) { if (isSmallFile(fileData)) { var item = fileData.Contents.CreatePiece( fileData.FileName, fileIdFactory(), fileData.Contents.TextRange); fileContents[generator.Next()] = item; } } if (LogPiecesStats) { Debug.Assert(fileContents.All(x => x != null)); Debug.Assert(fileContents.Aggregate(0L, (c, x) => c + x.ByteLength) == filesWithContents.Aggregate(0L, (c, x) => c + x.Contents.ByteLength)); fileContents.GetPartitionRanges(partitionCount).ForAll( (index, range) => { Logger.LogInfo("Partition {0} has a weight of {1:n0}", index, fileContents .Skip(range.Key) .Take(range.Value) .Aggregate(0L, (c, x) => c + x.ByteLength)); }); } return(fileContents); }