public async Task EncodingStatisticsTest2() { WritableMemoryStream outputStream = new WritableMemoryStream(); platform.WriteFileFunc = _ => outputStream; fileSystem.EnumFileSystemEntriesFunc = _ => { return(new[] { new FileSystemEntry(FileSystemEntryType.Directory, "dir", @"C:\dir\"), new FileSystemEntry(FileSystemEntryType.File, "f1.dat", @"C:\dir\f1.dat"), new FileSystemEntry(FileSystemEntryType.File, "f2.dat", @"C:\dir\f2.dat"), }); }; platform.ReadFileFunc = x => { switch (x) { case @"C:\dir\f1.dat": return(new MemoryStream(new byte[200 * 1024])); case @"C:\dir\f2.dat": return(new MemoryStream(new byte[300 * 1024])); default: throw new NotImplementedException(); } }; EncodingStatistics statistics = await service.EncodeAsync(@"C:\dir\", @"C:\outputFile.dat", CancellationToken.None, null); Assert.IsNotNull(statistics); Assert.AreEqual(500 * 1024, statistics.InputSize); Assert.AreEqual(64095, statistics.OutputSize); }
private async Task Encode() { string targetPath = FileSelectorService.GetSaveFile(DefaultArchiveExtension); if (string.IsNullOrEmpty(targetPath)) { return; } Status = ViewModelStatus.Encoding; cts = new CancellationTokenSource(); try { EncodingStatistics statistics = await EncodingService.EncodeAsync(Path, targetPath, cts.Token, new DefaultProgressHandler(this)); EncodingResult = new EncodingResultViewModel(statistics); Status = ViewModelStatus.WaitForCommand | ViewModelStatus.EncodingFinished; } catch (OperationCanceledException) { StatusMessage = "Encoding Cancelled"; Status = ViewModelStatus.Cancelled; } catch { Status = ViewModelStatus.Error; throw; } finally { cts.Dispose(); } }
/// <summary> /// Performs detection of single-byte Russian character encoding by given table of byte frequencies /// </summary> /// <param name="frequencies"></param> /// <param name="checkControlBytesPresence">If set to false, disables check for control characters /// (attempt to detect encoding will be done even in case when they are present)</param> /// <returns></returns> private static Encoding DetectSingleByteRussianEncoding(int[] frequencies, bool checkControlBytesPresence) { if (checkControlBytesPresence && HasControlBytes(frequencies)) { return(null); } EncodingStatistics bestByDistance = null; var bestDistance = double.MaxValue; EncodingStatistics bestByCharCount = null; var bestCharCount = int.MinValue; foreach (var encodingInfo in knownRussianEncodings) { var charFrequencies = MapBytesFrequenciesToCharsFrequencies(frequencies, encodingInfo.BytesToCharMap); var totalCharCount = charFrequencies.Sum(); var distributionInEncoding = BuildDistributionFromFrequencies(charFrequencies); var distance = DistanceToRussianDistribution(distributionInEncoding); var statistics = new EncodingStatistics { DistributionDistance = distance, TotalCharCount = totalCharCount, EncodingInfo = encodingInfo }; if (distance < bestDistance) { bestDistance = distance; bestByDistance = statistics; } if (totalCharCount > bestCharCount) { bestCharCount = totalCharCount; bestByCharCount = statistics; } } if ((bestByDistance == null) || (bestByCharCount == null)) { // it should never happen as we have at least one known encoding return(null); } if ((bestByCharCount == bestByDistance) || ((bestByCharCount.DistributionDistance - bestByDistance.DistributionDistance) > 0.03) || (bestByDistance.DistributionDistance < 0.008)) { return(bestByDistance.EncodingInfo.Encoding); } // When encoding allows to decode more character while it has almost the same distance // from predefined Russian distribution then we will use that encoding instead of // the best one selected by distance. if ((bestByCharCount.DistributionDistance - bestByDistance.DistributionDistance) < 0.01) { return(bestByCharCount.EncodingInfo.Encoding); } return(null); }
public async Task EncodingStatisticsTest1() { fileSystem.EnumFileSystemEntriesFunc = _ => { return(new[] { new FileSystemEntry(FileSystemEntryType.File, "file.dat", @"C:\dir\file.dat"), }); }; WritableMemoryStream outputStream = new WritableMemoryStream(); platform.WriteFileFunc = _ => outputStream; platform.ReadFileFunc = x => new MemoryStream(new byte[0]); EncodingStatistics statistics = await service.EncodeAsync(@"C:\dir\", @"C:\outputFile.dat", CancellationToken.None, null); Assert.IsNotNull(statistics); Assert.AreEqual(0, statistics.InputSize); Assert.AreEqual(42, statistics.OutputSize); }
public EncodingResultViewModel(EncodingStatistics encodingStatistics) { Guard.IsNotNull(encodingStatistics, nameof(encodingStatistics)); this.statistics = encodingStatistics; inputSizeTexLazy = new Lazy <string>(() => string.Format(FileSizeFormatProvider.Instance, "Input Size: {0:FS}", statistics.InputSize)); outputSizeTexLazy = new Lazy <string>(() => string.Format(FileSizeFormatProvider.Instance, "Output Size: {0:FS}", statistics.OutputSize)); saveFactorTexLazy = new Lazy <string>(() => { if (statistics.OutputSize == 0) { return("(infinity)"); } double value = (1 - (double)statistics.OutputSize / statistics.InputSize) * 100; return($"Save Factor: {value.ToString("F02", CultureInfo.InvariantCulture)}%"); }); }