public async Task EncodingStatisticsTest2()
        {
            WritableMemoryStream outputStream = new WritableMemoryStream();

            platform.WriteFileFunc = _ => outputStream;

            fileSystem.EnumFileSystemEntriesFunc = _ => {
                return(new[] {
                    new FileSystemEntry(FileSystemEntryType.Directory, "dir", @"C:\dir\"),
                    new FileSystemEntry(FileSystemEntryType.File, "f1.dat", @"C:\dir\f1.dat"),
                    new FileSystemEntry(FileSystemEntryType.File, "f2.dat", @"C:\dir\f2.dat"),
                });
            };
            platform.ReadFileFunc = x => {
                switch (x)
                {
                case @"C:\dir\f1.dat": return(new MemoryStream(new byte[200 * 1024]));

                case @"C:\dir\f2.dat": return(new MemoryStream(new byte[300 * 1024]));

                default: throw new NotImplementedException();
                }
            };

            EncodingStatistics statistics = await service.EncodeAsync(@"C:\dir\", @"C:\outputFile.dat", CancellationToken.None, null);

            Assert.IsNotNull(statistics);
            Assert.AreEqual(500 * 1024, statistics.InputSize);
            Assert.AreEqual(64095, statistics.OutputSize);
        }
Пример #2
0
        private async Task Encode()
        {
            string targetPath = FileSelectorService.GetSaveFile(DefaultArchiveExtension);

            if (string.IsNullOrEmpty(targetPath))
            {
                return;
            }

            Status = ViewModelStatus.Encoding;
            cts    = new CancellationTokenSource();
            try {
                EncodingStatistics statistics = await EncodingService.EncodeAsync(Path, targetPath, cts.Token, new DefaultProgressHandler(this));

                EncodingResult = new EncodingResultViewModel(statistics);
                Status         = ViewModelStatus.WaitForCommand | ViewModelStatus.EncodingFinished;
            }
            catch (OperationCanceledException) {
                StatusMessage = "Encoding Cancelled";
                Status        = ViewModelStatus.Cancelled;
            }
            catch {
                Status = ViewModelStatus.Error;
                throw;
            }
            finally {
                cts.Dispose();
            }
        }
Пример #3
0
        /// <summary>
        /// Performs detection of single-byte Russian character encoding by given table of byte frequencies
        /// </summary>
        /// <param name="frequencies"></param>
        /// <param name="checkControlBytesPresence">If set to false, disables check for control characters
        /// (attempt to detect encoding will be done even in case when they are present)</param>
        /// <returns></returns>
        private static Encoding DetectSingleByteRussianEncoding(int[] frequencies, bool checkControlBytesPresence)
        {
            if (checkControlBytesPresence && HasControlBytes(frequencies))
            {
                return(null);
            }

            EncodingStatistics bestByDistance = null;
            var bestDistance = double.MaxValue;

            EncodingStatistics bestByCharCount = null;
            var bestCharCount = int.MinValue;

            foreach (var encodingInfo in knownRussianEncodings)
            {
                var charFrequencies        = MapBytesFrequenciesToCharsFrequencies(frequencies, encodingInfo.BytesToCharMap);
                var totalCharCount         = charFrequencies.Sum();
                var distributionInEncoding = BuildDistributionFromFrequencies(charFrequencies);
                var distance = DistanceToRussianDistribution(distributionInEncoding);

                var statistics = new EncodingStatistics
                {
                    DistributionDistance = distance,
                    TotalCharCount       = totalCharCount,
                    EncodingInfo         = encodingInfo
                };
                if (distance < bestDistance)
                {
                    bestDistance   = distance;
                    bestByDistance = statistics;
                }
                if (totalCharCount > bestCharCount)
                {
                    bestCharCount   = totalCharCount;
                    bestByCharCount = statistics;
                }
            }
            if ((bestByDistance == null) || (bestByCharCount == null))
            {
                // it should never happen as we have at least one known encoding
                return(null);
            }
            if ((bestByCharCount == bestByDistance) ||
                ((bestByCharCount.DistributionDistance - bestByDistance.DistributionDistance) > 0.03) ||
                (bestByDistance.DistributionDistance < 0.008))
            {
                return(bestByDistance.EncodingInfo.Encoding);
            }
            // When encoding allows to decode more character while it has almost the same distance
            // from predefined Russian distribution then we will use that encoding instead of
            // the best one selected by distance.
            if ((bestByCharCount.DistributionDistance - bestByDistance.DistributionDistance) < 0.01)
            {
                return(bestByCharCount.EncodingInfo.Encoding);
            }
            return(null);
        }
        public async Task EncodingStatisticsTest1()
        {
            fileSystem.EnumFileSystemEntriesFunc = _ => {
                return(new[] {
                    new FileSystemEntry(FileSystemEntryType.File, "file.dat", @"C:\dir\file.dat"),
                });
            };
            WritableMemoryStream outputStream = new WritableMemoryStream();

            platform.WriteFileFunc = _ => outputStream;
            platform.ReadFileFunc  = x => new MemoryStream(new byte[0]);

            EncodingStatistics statistics = await service.EncodeAsync(@"C:\dir\", @"C:\outputFile.dat", CancellationToken.None, null);

            Assert.IsNotNull(statistics);
            Assert.AreEqual(0, statistics.InputSize);
            Assert.AreEqual(42, statistics.OutputSize);
        }
Пример #5
0
        public EncodingResultViewModel(EncodingStatistics encodingStatistics)
        {
            Guard.IsNotNull(encodingStatistics, nameof(encodingStatistics));
            this.statistics = encodingStatistics;

            inputSizeTexLazy = new Lazy <string>(() =>
                                                 string.Format(FileSizeFormatProvider.Instance, "Input Size: {0:FS}", statistics.InputSize));

            outputSizeTexLazy = new Lazy <string>(() =>
                                                  string.Format(FileSizeFormatProvider.Instance, "Output Size: {0:FS}", statistics.OutputSize));

            saveFactorTexLazy = new Lazy <string>(() => {
                if (statistics.OutputSize == 0)
                {
                    return("(infinity)");
                }
                double value = (1 - (double)statistics.OutputSize / statistics.InputSize) * 100;
                return($"Save Factor: {value.ToString("F02", CultureInfo.InvariantCulture)}%");
            });
        }