Пример #1
0
        public void RunTest(string fileName)
        {
            var filePath = Path.Combine(
                this.rootDirectory.FullName,
                "EncodingTests",
                fileName + ".cst"
                );

            using var reader = new StreamReader(filePath, Encoding.UTF8, true);
            var code = reader.ReadToEnd();

            var detectionResult = CharsetDetector.DetectFromFile(filePath);

            var encoding = detectionResult.Detected.Encoding;

            reader.Close();

            var formatter = new CodeFormatter();
            var result    = formatter.Format(code, new Options());

            var actualFilePath = filePath.Replace(".cst", ".actual.cst");

            using var stream = File.Open(actualFilePath, FileMode.Create);
            using var writer = new StreamWriter(stream, encoding);
            writer.Write(result.Code);

            var actualDetectionResult = CharsetDetector.DetectFromFile(
                filePath
                );
            var actualEncoding = actualDetectionResult.Detected.Encoding;

            encoding.Should().Be(actualEncoding);
        }
Пример #2
0
        public bool ParseFormat(string path, out List <SubtitleItem> result)
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

            var detect   = CharsetDetector.DetectFromFile(path);
            var encoding = Encoding.GetEncoding(detect.Detected.EncodingName);

            var xmlStream = new StreamReader(path, encoding).BaseStream;

            // rewind the stream
            xmlStream.Position = 0;
            var items = new List <SubtitleItem>();

            // parse xml stream
            var xmlDoc = new XmlDocument();

            xmlDoc.Load(xmlStream);

            if (xmlDoc.DocumentElement != null)
            {
                var nodeList = xmlDoc.DocumentElement.SelectNodes("//text");

                if (nodeList != null)
                {
                    for (var i = 0; i < nodeList.Count; i++)
                    {
                        var node = nodeList[i];
                        try
                        {
                            var startString = node.Attributes["start"].Value;
                            var start       = float.Parse(startString, CultureInfo.InvariantCulture);
                            var durString   = node.Attributes["dur"].Value;
                            var duration    = float.Parse(durString, CultureInfo.InvariantCulture);
                            var text        = node.InnerText;

                            items.Add(new SubtitleItem
                            {
                                StartTime = (int)(start * 1000),
                                EndTime   = (int)((start + duration) * 1000),
                                Text      = ConvertString(text)
                            });
                        }
                        catch
                        {
                            result = null;
                            return(false);
                        }
                    }
                }
            }

            if (items.Any())
            {
                result = Filters.RemoveDuplicateItems(items);
                return(true);
            }

            result = null;
            return(false);
        }
Пример #3
0
        /// <summary>
        /// Command line example: detect the encoding of the given file.
        /// </summary>
        /// <param name="filename">a filename</param>
        public static void DetectDemo(string filename)
        {
            // Detect from File
            DetectionResult result = CharsetDetector.DetectFromFile(filename);
            // Get the best Detection
            DetectionDetail resultDetected = result.Detected;

            // detected result may be null.
            if (resultDetected != null)
            {
                // Get the alias of the found encoding
                string encodingName = resultDetected.EncodingName;
                // Get the System.Text.Encoding of the found encoding (can be null if not available)
                Encoding encoding = resultDetected.Encoding;
                // Get the confidence of the found encoding (between 0 and 1)
                float confidence = resultDetected.Confidence;
                if (encoding != null)
                {
                    Console.WriteLine($"Detection completed: {filename}");
                    Console.WriteLine($"EncodingWebName: {encoding.WebName}{Environment.NewLine}Confidence: {confidence}");
                }
                else
                {
                    Console.WriteLine($"Detection completed: {filename}");
                    Console.WriteLine($"(Encoding is null){Environment.NewLine}EncodingName: {encodingName}{Environment.NewLine}Confidence: {confidence}");
                }
            }
            else
            {
                Console.WriteLine($"Detection failed: {filename}");
            }
        }
Пример #4
0
        private static void TestFile(string expectedCharset, string file)
        {
            var result   = CharsetDetector.DetectFromFile(file);
            var detected = result.Detected;

            StringAssert.AreEqualIgnoringCase(expectedCharset, detected.EncodingName,
                                              $"Charset detection failed for {file}. Expected: {expectedCharset}, detected: {detected.EncodingName} ({detected.Confidence * 100}% confidence)");
            Assert.NotNull(detected.Encoding);
        }
        private void TestFile(string expectedCharset, string file)
        {
            var result   = CharsetDetector.DetectFromFile(file);
            var detected = result.Detected;

            _logWriter.WriteLine($"- {file} ({expectedCharset}) -> {JsonConvert.SerializeObject(result, Formatting.Indented, new EncodingJsonConverter())}");
            StringAssert.AreEqualIgnoringCase(expectedCharset, detected.EncodingName,
                                              $"Charset detection failed for {file}. Expected: {expectedCharset}, detected: {detected.EncodingName} ({detected.Confidence * 100.0f:0.00############}% confidence)");
            Assert.NotNull(detected.Encoding);
        }
Пример #6
0
        public bool ParseFormat(string path, out List <SubtitleItem> result)
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

            var detect   = CharsetDetector.DetectFromFile(path);
            var encoding = Encoding.GetEncoding(detect.Detected.EncodingName);

            var xmlStream = new StreamReader(path, encoding).BaseStream;

            xmlStream.Position = 0;
            var items = new List <SubtitleItem>();

            var xElement = XElement.Load(xmlStream);
            var tt       = xElement.GetNamespaceOfPrefix("tt") ?? xElement.GetDefaultNamespace();

            var nodeList = xElement.Descendants(tt + "p").ToList();

            foreach (var node in nodeList)
            {
                try
                {
                    var reader = node.CreateReader();
                    reader.MoveToContent();
                    var beginString = node.Attribute("begin").Value.Replace("t", "");
                    var startTicks  = ParseTimecode(beginString);
                    var endString   = node.Attribute("end").Value.Replace("t", "");
                    var endTicks    = ParseTimecode(endString);
                    var text        = reader.ReadInnerXml()
                                      .Replace("<tt:", "<")
                                      .Replace("</tt:", "</")
                                      .Replace(string.Format(@" xmlns:tt=""{0}""", tt), "")
                                      .Replace(string.Format(@" xmlns=""{0}""", tt), "");

                    items.Add(new SubtitleItem
                    {
                        StartTime = (int)startTicks, EndTime = (int)endTicks, Text = ConvertString(text)
                    });
                }
                catch
                {
                    result = null;
                    return(false);
                }
            }

            if (items.Any())
            {
                result = Filters.RemoveDuplicateItems(items);
                return(true);
            }

            result = null;
            return(false);
        }
Пример #7
0
        public static Encoding GetEncoding(this string path)
        {
            var detection = CharsetDetector.DetectFromFile(path);

            var detected = detection.Detected
                           ?? detection.Details
                           .OrderByDescending(d => d.Encoding != default)
                           .ThenByDescending(d => d.Confidence).FirstOrDefault();

            var result = detected?.Encoding
                         ?? Encoding.Default;

            return(result);
        }
Пример #8
0
        public async Task DetectAsync()
        {
            var dr = await Task.Run(() =>
            {
                return(CharsetDetector.DetectFromFile(this.Path));
            });

            var encoding = dr.Detected?.Encoding;

            if (encoding != null)
            {
                this.EncodingName     = dr.Detected.EncodingName;
                this.Encoding         = dr.Detected.Encoding;
                this.IsEnabledConvert = true;
            }
        }
        public void TestFileUnsupportedEncodings(TestCase testCase)
        {
            var result   = CharsetDetector.DetectFromFile(testCase.InputFile.FullName);
            var detected = result.Detected;

            _logWriter.WriteLine(string.Concat(
                                     $"- {testCase.InputFile.FullName} ({testCase.ExpectedEncoding}) -> ",
                                     $"{JsonConvert.SerializeObject(result, Formatting.Indented, new EncodingJsonConverter())}"));

            StringAssert.AreEqualIgnoringCase(
                testCase.ExpectedEncoding,
                detected.EncodingName,
                string.Concat(
                    $"Charset detection failed for {testCase.InputFile.FullName}. ",
                    $"Expected: {testCase.ExpectedEncoding}. ",
                    $"Detected: {detected.EncodingName} ",
                    $"({detected.Confidence * 100.0f:0.00############}% confidence)."));
        }
Пример #10
0
        private static void Process(string charset, string dirname)
        {
            string path = Path.Combine(DATA_ROOT, dirname);

            if (!Directory.Exists(path))
            {
                return;
            }

            string[] files = Directory.GetFiles(path);

            foreach (string file in files)
            {
                var result   = CharsetDetector.DetectFromFile(file);
                var detected = result.Detected;
                Assert.True(charset == detected.EncodingName, string.Format("Charset detection failed for {0}. Expected: {1}, detected: {2} ({3}% confidence)", file, charset, detected.EncodingName, detected.Confidence * 100));
                Assert.NotNull(detected.Encoding);
            }
        }
Пример #11
0
        public static Encoding GetEncodingType(string fileName)
        {
            DetectionResult result         = CharsetDetector.DetectFromFile(fileName);
            DetectionDetail resultDetected = result.Detected;

            if (resultDetected.Confidence < 0.7)
            {
                try
                {
                    return(Encoding.GetEncoding("GB18030"));
                }
                catch
                {
                    return(Encoding.GetEncoding("GB2312"));
                }
            }
            Encoding encoding = resultDetected.Encoding;

            return(encoding);
        }
Пример #12
0
        private async Task <FileInfoDto> GetFileInfo(string fullpath, bool withContent = true)
        {
            var attributes = File.GetAttributes(fullpath);

            FileInfoDto fileInfoDto = new FileInfoDto {
                Size = 0, FullPath = fullpath
            };

            if ((attributes & FileAttributes.Directory) == FileAttributes.Directory)
            {
                fileInfoDto.Dir  = true;
                fileInfoDto.Name = Path.GetFileName(fullpath);
                fileInfoDto.Path = Path.GetRelativePath(basePath, Directory.GetParent(fullpath).FullName);
            }
            else
            {
                FileInfo fileInfo = new FileInfo(fullpath);
                fileInfoDto.Name        = fileInfo.Name;
                fileInfoDto.Path        = Path.GetRelativePath(basePath, Directory.GetParent(fullpath).FullName);
                fileInfoDto.Size        = fileInfo.Length;
                fileInfoDto.UpdateTime  = fileInfo.LastWriteTime;
                fileInfoDto.Url         = new Uri(fileInfo.FullName).AbsoluteUri;
                fileInfoDto.ContentType = GetMimeType(fileInfo.FullName);
                if (withContent)
                {
                    var charsetResult = CharsetDetector.DetectFromFile(fileInfo);
                    if (charsetResult.Detected != null)
                    {
                        var encoding = charsetResult.Detected.Encoding;

                        fileInfoDto.Content = await File.ReadAllTextAsync(fileInfo.FullName, encoding);
                    }
                    else
                    {
                        fileInfoDto.Content = "not support Blob file";
                    }
                }
            }

            return(fileInfoDto);
        }
Пример #13
0
        /// <summary>
        /// Command line example: detects the encoding of the given file.
        /// </summary>
        /// <param name="args">a filename</param>
        public static void Main(String[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine("Usage: udetect <filename>");
                return;
            }

            string filename = args[0];

            var result = CharsetDetector.DetectFromFile(filename);

            if (result.Detected != null)
            {
                Console.WriteLine("Charset: {0}, confidence: {1}", result.Detected.EncodingName, result.Detected.Confidence);
            }
            else
            {
                Console.WriteLine("Detection failed.");
            }
        }
Пример #14
0
        /// <summary>
        /// Command line example: detects the encoding of the given file.
        /// </summary>
        /// <param name="args">a filename</param>
        public static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine("Usage: ConsoleExample <filename>");
                return;
            }

            var filename = args[0];

            if (!File.Exists(filename))
            {
                Console.WriteLine($"File not found: {filename}");
                return;
            }

            var result  = CharsetDetector.DetectFromFile(filename);
            var message = result.Detected != null
                ? $"Detected encoding {result.Detected.Encoding.WebName} with confidence {result.Detected.Confidence}."
                : $"Detection failed: {filename}";

            Console.WriteLine(message);
        }
Пример #15
0
        public void Setup()
        {
            ParseJsonErrors         = new List <ParseJsonError>();
            WrongEncodingJsonErrors = new List <JsonEncodingError>();

            var packageJsonPath = Path.Combine(BasePath, @"package.json");

            var jsonPackage = JObject.Parse(File.ReadAllText(packageJsonPath));

            var moduleWorkspaces = ((JArray)jsonPackage["workspaces"])
                                   .Select(p => ((string)p).Replace("/", "\\"))
                                   .Where(w => !w.Contains("asc-web-components"))
                                   .ToList();

            Workspaces = new List <string>();

            Workspaces.AddRange(moduleWorkspaces);

            Workspaces.Add("public\\locales");

            var translationFiles = from wsPath in Workspaces
                                   let clientDir = Path.Combine(BasePath, wsPath)
                                                   from filePath in Directory.EnumerateFiles(clientDir, "*.json", SearchOption.AllDirectories)
                                                   where filePath.Contains("public\\locales\\")
                                                   select Path.GetFullPath(filePath);

            TranslationFiles = new List <TranslationFile>();

            foreach (var path in translationFiles)
            {
                try
                {
                    var result = CharsetDetector.DetectFromFile(path);

                    if (result.Detected.EncodingName != "utf-8" &&
                        result.Detected.EncodingName != "ascii")
                    {
                        WrongEncodingJsonErrors.Add(
                            new JsonEncodingError(path, result.Detected));
                    }

                    using (var md5 = MD5.Create())
                    {
                        using (var stream = File.OpenRead(path))
                        {
                            var hash    = md5.ComputeHash(stream);
                            var md5hash = BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant();

                            stream.Position = 0;

                            using var sr = new StreamReader(stream, Encoding.UTF8);
                            {
                                var jsonTranslation = JObject.Parse(sr.ReadToEnd());

                                var translationFile = new TranslationFile(path, jsonTranslation.Properties()
                                                                          .Select(p => new TranslationItem(p.Name, (string)p.Value))
                                                                          .ToList(), md5hash);

                                TranslationFiles.Add(translationFile);
                            }
                        }
                    }

                    /*   Re-write by order */

                    //var orderedList = jsonTranslation.Properties().OrderBy(t => t.Name);

                    //var result = new JObject(orderedList);

                    //var sortedJsonString = JsonConvert.SerializeObject(result, Formatting.Indented);

                    //File.WriteAllText(path, sortedJsonString);
                }
                catch (Exception ex)
                {
                    ParseJsonErrors.Add(new ParseJsonError(path, ex));
                    Debug.WriteLine($"File path = {path} failed to parse with error: {ex.Message}");
                }
            }

            var javascriptFiles = (from wsPath in Workspaces
                                   let clientDir = Path.Combine(BasePath, wsPath)
                                                   from file in Directory.EnumerateFiles(clientDir, "*.js", SearchOption.AllDirectories)
                                                   where !file.Contains("dist\\")
                                                   select file)
                                  .ToList();

            javascriptFiles.AddRange(from wsPath in Workspaces
                                     let clientDir = Path.Combine(BasePath, wsPath)
                                                     from file in Directory.EnumerateFiles(clientDir, "*.jsx", SearchOption.AllDirectories)
                                                     where !file.Contains("dist\\")
                                                     select file);

            JavaScriptFiles = new List <JavaScriptFile>();

            var pattern1 = "[.{\\s\\(]t\\(\\s*[\"\'`]([a-zA-Z0-9_.:_\\s{}/_-]+)[\"\'`]\\s*[\\),]";
            var pattern2 = "i18nKey=\"([a-zA-Z0-9_.-]+)\"";

            var regexp = new Regex($"({pattern1})|({pattern2})", RegexOptions.Multiline | RegexOptions.ECMAScript);

            var notTranslatedToastsRegex = new Regex("(?<=toastr.info\\([\"`\'])(.*)(?=[\"\'`])" +
                                                     "|(?<=toastr.error\\([\"`\'])(.*)(?=[\"\'`])" +
                                                     "|(?<=toastr.success\\([\"`\'])(.*)(?=[\"\'`])" +
                                                     "|(?<=toastr.warn\\([\"`\'])(.*)(?=[\"\'`])", RegexOptions.Multiline | RegexOptions.ECMAScript);

            NotTranslatedToasts = new List <KeyValuePair <string, string> >();

            foreach (var path in javascriptFiles)
            {
                var jsFileText = File.ReadAllText(path);

                var toastMatches = notTranslatedToastsRegex.Matches(jsFileText).ToList();

                if (toastMatches.Any())
                {
                    foreach (var toastMatch in toastMatches)
                    {
                        var found = toastMatch.Value;
                        if (!string.IsNullOrEmpty(found) && !NotTranslatedToasts.Exists(t => t.Value == found))
                        {
                            NotTranslatedToasts.Add(new KeyValuePair <string, string>(path, found));
                        }
                    }
                }

                var matches = regexp.Matches(jsFileText);

                var translationKeys = matches
                                      .Select(m => m.Groups[2].Value == ""
                        ? m.Groups[4].Value
                        : m.Groups[2].Value)
                                      .ToList();

                if (!translationKeys.Any())
                {
                    continue;
                }

                var jsFile = new JavaScriptFile(path);

                jsFile.TranslationKeys = translationKeys;

                JavaScriptFiles.Add(jsFile);
            }

            ModuleFolders = new List <ModuleFolder>();

            var list = TranslationFiles
                       .Select(t => new
            {
                ModulePath = moduleWorkspaces.FirstOrDefault(m => t.FilePath.Contains(m)),
                Language   = new LanguageItem
                {
                    Path         = t.FilePath,
                    Language     = t.Language,
                    Translations = t.Translations
                },
                lng = t.Language
            }).ToList();

            var moduleTranslations = list
                                     .GroupBy(t => t.ModulePath)
                                     .Select(g => new
            {
                ModulePath = g.Key,
                Languages  = g.ToList().Select(t => t.Language).ToList()
                             .ToList()
            })
                                     .ToList();

            var moduleJsTranslatedFiles = JavaScriptFiles
                                          .Select(t => new
            {
                ModulePath = moduleWorkspaces.FirstOrDefault(m => t.Path.Contains(m)),
                t.Path,
                t.TranslationKeys
            })
                                          .GroupBy(t => t.ModulePath)
                                          .Select(g => new
            {
                ModulePath      = g.Key,
                TranslationKeys = g.ToList().SelectMany(t => t.TranslationKeys).ToList()
            })
                                          .ToList();

            foreach (var ws in moduleWorkspaces)
            {
                var t = moduleTranslations.FirstOrDefault(t => t.ModulePath == ws);
                var j = moduleJsTranslatedFiles.FirstOrDefault(t => t.ModulePath == ws);

                if (j == null && t == null)
                {
                    continue;
                }

                ModuleFolders.Add(new ModuleFolder
                {
                    Path = ws,
                    AvailableLanguages       = t?.Languages,
                    AppliedJsTranslationKeys = j?.TranslationKeys
                });
            }

            CommonTranslations = TranslationFiles
                                 .Where(file => file.FilePath.StartsWith($"{BasePath}public\\locales"))
                                 .Select(t => new LanguageItem
            {
                Path         = t.FilePath,
                Language     = t.Language,
                Translations = t.Translations
            }).ToList();
        }
Пример #16
0
        public bool ParseFormat(string path, out List <SubtitleItem> result)
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

            var detect   = CharsetDetector.DetectFromFile(path);
            var encoding = Encoding.GetEncoding(detect.Detected.EncodingName);

            var subStream = new StreamReader(path, encoding).BaseStream;

            subStream.Position = 0;
            var reader = new StreamReader(subStream, encoding, true);

            var firstLine = reader.ReadLine();

            if (firstLine == FirstLine)
            {
                var line       = reader.ReadLine();
                var lineNumber = 2;
                while (line != null && lineNumber <= MaxLineNumberForItems && !IsTimestampLine(line))
                {
                    line = reader.ReadLine();
                    lineNumber++;
                }

                if (line != null && lineNumber <= MaxLineNumberForItems && IsTimestampLine(line))
                {
                    var items = new List <SubtitleItem>();

                    var timeCodeLine = line;
                    var textLines    = new List <string>();

                    while (line != null)
                    {
                        line = reader.ReadLine();
                        if (IsTimestampLine(line))
                        {
                            var timeCodes = ParseTimecodeLine(timeCodeLine);
                            var start     = timeCodes.Item1;
                            var end       = timeCodes.Item2;

                            if (start > 0 && end > 0 && textLines.Any())
                            {
                                items.Add(new SubtitleItem
                                {
                                    StartTime = start,
                                    EndTime   = end,
                                    Text      = ConvertString(string.Join("\r\n", textLines.ToArray()))
                                });
                            }

                            timeCodeLine = line;
                            textLines    = new List <string>();
                        }
                        else
                        {
                            if (!string.IsNullOrEmpty(line))
                            {
                                textLines.Add(line);
                            }
                        }
                    }

                    var lastTimeCodes = ParseTimecodeLine(timeCodeLine);
                    var lastStart     = lastTimeCodes.Item1;
                    var lastEnd       = lastTimeCodes.Item2;
                    if (lastStart > 0 && lastEnd > 0 && textLines.Any())
                    {
                        items.Add(new SubtitleItem
                        {
                            StartTime = lastStart,
                            EndTime   = lastEnd,
                            Text      = ConvertString(string.Join("\r\n", textLines.ToArray()))
                        });
                    }

                    if (items.Any())
                    {
                        result = Filters.RemoveDuplicateItems(items);
                        return(true);
                    }

                    result = null;
                    return(false);
                }

                result = null;
                return(false);
            }

            result = null;
            return(false);
        }
Пример #17
0
        private static async Task DoWork(
            string file,
            string?path,
            bool validate,
            bool check,
            CancellationToken cancellationToken)
        {
            if (
                file.EndsWith(".g.cs") ||
                file.EndsWith(".cshtml.cs") ||
                file.ContainsIgnoreCase("\\obj\\") ||
                file.ContainsIgnoreCase("/obj/") ||
                file.EndsWithIgnoreCase("AllInOne.cs")
                )
            {
                return;
            }

            cancellationToken.ThrowIfCancellationRequested();

            using var reader = new StreamReader(file);
            var code = await reader.ReadToEndAsync();

            var detectionResult = CharsetDetector.DetectFromFile(file);
            var encoding        = detectionResult.Detected.Encoding;

            reader.Close();

            cancellationToken.ThrowIfCancellationRequested();

            CSharpierResult result;

            string GetPath()
            {
                return(PadToSize(file.Substring(path?.Length ?? 0)));
            }

            try
            {
                result = await new CodeFormatter().FormatAsync(
                    code,
                    new Options(),
                    cancellationToken
                    );
            }
            catch (OperationCanceledException)
            {
                throw;
            }
            catch (Exception ex)
            {
                Interlocked.Increment(ref files);
                Console.WriteLine(
                    GetPath() + " - threw exception while formatting"
                    );
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
                Console.WriteLine();
                Interlocked.Increment(ref exceptionsFormatting);
                return;
            }


            if (result.Errors.Any())
            {
                Interlocked.Increment(ref files);
                Console.WriteLine(GetPath() + " - failed to compile");
                return;
            }

            if (!result.FailureMessage.IsBlank())
            {
                Interlocked.Increment(ref files);
                Console.WriteLine(GetPath() + " - " + result.FailureMessage);
                return;
            }

            if (validate)
            {
                var syntaxNodeComparer = new SyntaxNodeComparer(
                    code,
                    result.Code,
                    cancellationToken
                    );

                try
                {
                    var failure =
                        await syntaxNodeComparer.CompareSourceAsync(
                            cancellationToken
                            );

                    if (!string.IsNullOrEmpty(failure))
                    {
                        Interlocked.Increment(ref sourceLost);
                        Console.WriteLine(
                            GetPath() + " - failed syntax tree validation"
                            );
                        Console.WriteLine(failure);
                    }
                }
                catch (Exception ex)
                {
                    Interlocked.Increment(ref exceptionsValidatingSource);
                    Console.WriteLine(
                        GetPath() + " - failed with exception during syntax tree validation" + Environment.NewLine + ex.Message + ex.StackTrace
                        );
                }
            }

            if (check)
            {
                if (result.Code != code)
                {
                    Console.WriteLine(GetPath() + " - was not formatted");
                    Interlocked.Increment(ref unformattedFiles);
                }
            }

            cancellationToken.ThrowIfCancellationRequested();
            Interlocked.Increment(ref files);

            if (!check)
            {
                // purposely avoid async here, that way the file completely writes if the process gets cancelled while running.
                File.WriteAllText(file, result.Code, encoding);
            }
        }
Пример #18
0
        public bool ParseFormat(string path, out List <SubtitleItem> result)
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

            var detect   = CharsetDetector.DetectFromFile(path);
            var encoding = Encoding.GetEncoding(detect.Detected.EncodingName);

            var subStream = new StreamReader(path, encoding).BaseStream;

            if (!subStream.CanRead || !subStream.CanSeek)
            {
                result = null;
                return(false);
            }

            subStream.Position = 0;
            var reader = new StreamReader(subStream, encoding, true);

            var items = new List <SubtitleItem>();
            var line  = reader.ReadLine();

            while (line != null && !IsMicroDvdLine(line))
            {
                line = reader.ReadLine();
            }

            if (line != null)
            {
                float frameRate;
                var   firstItem = ParseLine(line, DefaultFrameRate);
                if (firstItem.Text != null && firstItem.Text.Any())
                {
                    var success = TryExtractFrameRate(firstItem.Text, out frameRate);
                    if (!success)
                    {
                        frameRate = DefaultFrameRate;

                        items.Add(firstItem);
                    }
                }
                else
                {
                    frameRate = DefaultFrameRate;
                }

                line = reader.ReadLine();
                while (line != null)
                {
                    if (!string.IsNullOrEmpty(line))
                    {
                        var item = ParseLine(line, frameRate);
                        items.Add(item);
                    }

                    line = reader.ReadLine();
                }
            }

            if (items.Any())
            {
                result = Filters.RemoveDuplicateItems(items);
                return(true);
            }

            result = null;
            return(false);
        }
Пример #19
0
        public bool ParseFormat(string path, out List <SubtitleItem> result)
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

            var detect   = CharsetDetector.DetectFromFile(path);
            var encoding = Encoding.GetEncoding(detect.Detected.EncodingName);

            var ssaStream = new StreamReader(path, encoding).BaseStream;

            if (!ssaStream.CanRead || !ssaStream.CanSeek)
            {
                result = null;
                return(false);
            }

            ssaStream.Position = 0;

            var reader = new StreamReader(ssaStream, encoding, true);

            var line       = reader.ReadLine();
            var lineNumber = 1;

            while (line != null && line != EventLine)
            {
                line = reader.ReadLine();
                lineNumber++;
            }

            if (line != null)
            {
                var headerLine = reader.ReadLine();
                if (!string.IsNullOrEmpty(headerLine))
                {
                    var columnHeaders = headerLine.Split(Separator).Select(head => head.Trim()).ToList();

                    var startIndexColumn = columnHeaders.IndexOf(StartColumn);
                    var endIndexColumn   = columnHeaders.IndexOf(EndColumn);
                    var textIndexColumn  = columnHeaders.IndexOf(TextColumn);

                    if (startIndexColumn > 0 && endIndexColumn > 0 && textIndexColumn > 0)
                    {
                        var items = new List <SubtitleItem>();

                        line = reader.ReadLine();
                        while (line != null)
                        {
                            if (!string.IsNullOrEmpty(line))
                            {
                                var columns   = line.Split(Separator);
                                var startText = columns[startIndexColumn];
                                var endText   = columns[endIndexColumn];

                                var textLine = string.Join(",", columns.Skip(textIndexColumn));

                                var start = ParseSsaTimecode(startText);
                                var end   = ParseSsaTimecode(endText);

                                if (start > 0 && end > 0 && !string.IsNullOrEmpty(textLine))
                                {
                                    var item = new SubtitleItem
                                    {
                                        StartTime = start, EndTime = end, Text = ConvertString(textLine)
                                    };
                                    items.Add(item);
                                }
                            }

                            line = reader.ReadLine();
                        }

                        if (items.Any())
                        {
                            result = Filters.RemoveDuplicateItems(items);
                            return(true);
                        }

                        result = null;
                        return(false);
                    }

                    result = null;
                    return(false);
                }

                result = null;
                return(false);
            }

            result = null;
            return(false);
        }
Пример #20
0
        public bool ParseFormat(string path, out List <SubtitleItem> result)
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

            var detect   = CharsetDetector.DetectFromFile(path);
            var encoding = Encoding.GetEncoding(detect.Detected.EncodingName);

            var items = new List <SubtitleItem>();
            var sr    = new StreamReader(path, encoding);

            var line = sr.ReadLine();

            if (line == null || !line.Equals("<SAMI>"))
            {
                sr.Close();
                result = null;
                return(false);
            }

            while ((line = sr.ReadLine()) != null)
            {
                if (line.Equals("<BODY>"))
                {
                    break;
                }
            }

            if (string.IsNullOrEmpty(line))
            {
                sr.Close();
                result = null;
                return(false);
            }

            var check         = false;
            var miClassString = new string[2];
            var sb            = new StringBuilder();
            var sbComment     = false;

            while (string.IsNullOrEmpty(line) != true)
            {
                if (check == false)
                {
                    line = sr.ReadLine();

                    while (true)
                    {
                        if (string.IsNullOrEmpty(line))
                        {
                            line = sr.ReadLine();
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                else
                {
                    check = false;
                }

                if (line.Contains("<--") && line.Contains("-->"))
                {
                    continue;
                }

                if (line.Contains("<!--") && line.Contains("-->"))
                {
                    continue;
                }

                if (line.Contains("<!--"))
                {
                    sbComment = true;
                }

                if (line.Contains("-->"))
                {
                    sbComment = false;
                }

                if (sbComment)
                {
                    continue;
                }

                if (line.Contains("</BODY>"))
                {
                    break;
                }

                if (line.Contains("</SAMI>"))
                {
                    break;
                }

                if (line[0].Equals('<'))
                {
                    var length = line.IndexOf('>');
                    miClassString[0] = line.Substring(1, length - 1);
                    miClassString[1] = line.Substring(length + 2);
                    var splitIndex = miClassString[1].IndexOf('>');
                    miClassString[1] = miClassString[1].Remove(splitIndex);
                    var miSync = miClassString[0].Split('=');

                    while ((line = sr.ReadLine())?.ToUpper().Contains("<SYNC", StringComparison.OrdinalIgnoreCase) ==
                           false)
                    {
                        sb.Append(line);
                    }

                    items.Add(new SubtitleItem(int.Parse(miSync[1]), ConvertString(sb.ToString())));

                    sb = new StringBuilder();

                    check = true;
                }
            }

            sr.Close();

            for (var i = 0; i < items.Count; i++)
            {
                var endTime = i == items.Count - 1
                    ? items[i].StartTime + 1000
                    : items[i + 1].StartTime;

                items[i].EndTime = endTime;
            }

            result = Filters.RemoveDuplicateItems(items);
            return(true);
        }
Пример #21
0
        public bool ParseFormat(string path, out List <SubtitleItem> result)
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

            var detect   = CharsetDetector.DetectFromFile(path);
            var encoding = Encoding.GetEncoding(detect.Detected.EncodingName);

            var vttStream = new StreamReader(path, encoding).BaseStream;

            if (!vttStream.CanRead || !vttStream.CanSeek)
            {
                result = null;
                return(false);
            }

            vttStream.Position = 0;

            var reader = new StreamReader(vttStream, encoding, true);

            var items       = new List <SubtitleItem>();
            var vttSubParts = GetVttSubTitleParts(reader).ToList();

            if (vttSubParts.Any())
            {
                foreach (var vttSubPart in vttSubParts)
                {
                    var lines =
                        vttSubPart.Split(new[] { Environment.NewLine }, StringSplitOptions.None)
                        .Select(s => s.Trim())
                        .Where(l => !string.IsNullOrEmpty(l))
                        .ToList();

                    var item = new SubtitleItem();
                    foreach (var line in lines)
                    {
                        if (item.StartTime == 0 && item.EndTime == 0)
                        {
                            int startTc;
                            int endTc;
                            var success = TryParseTimecodeLine(line, out startTc, out endTc);
                            if (success)
                            {
                                item.StartTime = startTc;
                                item.EndTime   = endTc;
                            }
                        }
                        else
                        {
                            item.Text = ConvertString(line);
                        }

                        item.Text = string.IsNullOrEmpty(item.Text) ? "" : item.Text;
                    }

                    if ((item.StartTime != 0 || item.EndTime != 0) && item.Text.Any())
                    {
                        items.Add(item);
                    }
                }

                result = Filters.RemoveDuplicateItems(items);
                return(true);
            }

            result = null;
            return(false);
        }