Пример #1
0
        /// <summary>
        /// Partitioning evenly ensures that each processor used by PLinq will deal
        /// with a partition of equal "weight". In this case, we make sure each
        /// partition contains not only the same amount of files, but also (as close
        /// to as possible) the same amount of "bytes". For example, if we have 100
        /// files totaling 32MB and 4 processors, we will end up with 4 partitions
        /// of (exactly) 25 files totalling (approximately) 8MB each.
        ///
        /// Note: This code inside this method is not the cleanest, but it is
        /// written in a way that tries to minimiz the # of large array allocations.
        /// </summary>
        public static IList <FileContentsPiece> CreateFilePieces(ICollection <FileWithContents> files)
        {
            var filesWithContents = FilterFilesWithContents(files);

            // Factory for file identifiers
            var        currentFileId = 0;
            Func <int> fileIdFactory = () => currentFileId++;

            // Predicate to figure out if a file is "small"
            Func <FileWithContents, bool> isSmallFile = x => x.Contents.ByteLength <= ChunkSize;

            // Count the total # of small and large files, while splitting large files
            // into their fragments.
            var smallFilesCount = 0;
            var largeFiles      = new List <FileContentsPiece>(filesWithContents.Count / 100);

            foreach (var fileData in filesWithContents)
            {
                if (isSmallFile(fileData))
                {
                    smallFilesCount++;
                }
                else
                {
                    var splitFileContents = SplitFileContents(fileData, fileIdFactory());
                    largeFiles.AddRange(splitFileContents);
                }
            }

            var totalFileCount = smallFilesCount + largeFiles.Count;

            // Store elements in their partitions
            // # of partitions = # of logical processors
            var filePieces     = new FileContentsPiece[totalFileCount];
            var partitionCount = Environment.ProcessorCount;
            var generator      = new PartitionIndicesGenerator(totalFileCount, partitionCount);

            // Store large files
            foreach (var item in largeFiles)
            {
                filePieces[generator.Next()] = item;
            }

            // Store small files
            foreach (var fileData in filesWithContents)
            {
                if (isSmallFile(fileData))
                {
                    var item = fileData.Contents.CreatePiece(
                        fileData.FileName,
                        fileIdFactory(),
                        fileData.Contents.TextRange);
                    filePieces[generator.Next()] = item;
                }
            }

            FileDatabaseDebugLogger.LogFilePieces(filesWithContents, filePieces, partitionCount);
            return(filePieces);
        }
Пример #2
0
        /// <summary>
        /// Partitioning evenly ensures that each processor used by PLinq will deal
        /// with a partition of equal "weight". In this case, we make sure each
        /// partition contains not only the same amount of files, but also (as close
        /// to as possible) the same amount of "bytes". For example, if we have 100
        /// files totaling 32MB and 4 processors, we will end up with 4 partitions
        /// of (exactly) 25 files totalling (approximately) 8MB each.
        ///
        /// Note: This code inside this method is not the cleanest, but it is
        /// written in a way that tries to minimiz the # of large array allocations.
        /// </summary>
        private static IList <IFileContentsPiece> CreateFilePieces(ICollection <FileData> filesWithContents)
        {
            // Factory for file identifiers
            int        currentFileId = 0;
            Func <int> fileIdFactory = () => currentFileId++;

            // Predicate to figure out if a file is "small"
            Func <FileData, bool> isSmallFile = x => x.Contents.ByteLength <= ChunkSize;

            // Count the total # of small and large files, while splitting large files
            // into their fragments.
            var smallFilesCount = 0;
            var largeFiles      = new List <FileContentsPiece>(filesWithContents.Count / 100);

            foreach (var fileData in filesWithContents)
            {
                if (isSmallFile(fileData))
                {
                    smallFilesCount++;
                }
                else
                {
                    var splitFileContents = SplitFileContents(fileData, fileIdFactory());
                    largeFiles.AddRange(splitFileContents);
                }
            }
            var totalFileCount = smallFilesCount + largeFiles.Count;

            // Store elements in their partitions
            // # of partitions = # of logical processors
            var fileContents   = new FileContentsPiece[totalFileCount];
            var partitionCount = Environment.ProcessorCount;
            var generator      = new PartitionIndicesGenerator(
                totalFileCount,
                partitionCount);

            // Store large files
            foreach (var item in largeFiles)
            {
                fileContents[generator.Next()] = item;
            }
            // Store small files
            foreach (var fileData in filesWithContents)
            {
                if (isSmallFile(fileData))
                {
                    var item = fileData.Contents.CreatePiece(
                        fileData.FileName,
                        fileIdFactory(),
                        fileData.Contents.TextRange);
                    fileContents[generator.Next()] = item;
                }
            }

            if (LogPiecesStats)
            {
                Debug.Assert(fileContents.All(x => x != null));
                Debug.Assert(fileContents.Aggregate(0L, (c, x) => c + x.ByteLength) ==
                             filesWithContents.Aggregate(0L, (c, x) => c + x.Contents.ByteLength));
                fileContents.GetPartitionRanges(partitionCount).ForAll(
                    (index, range) => {
                    Logger.LogInfo("Partition {0} has a weight of {1:n0}",
                                   index,
                                   fileContents
                                   .Skip(range.Key)
                                   .Take(range.Value)
                                   .Aggregate(0L, (c, x) => c + x.ByteLength));
                });
            }
            return(fileContents);
        }