public void RunTest(string fileName) { var filePath = Path.Combine( this.rootDirectory.FullName, "EncodingTests", fileName + ".cst" ); using var reader = new StreamReader(filePath, Encoding.UTF8, true); var code = reader.ReadToEnd(); var detectionResult = CharsetDetector.DetectFromFile(filePath); var encoding = detectionResult.Detected.Encoding; reader.Close(); var formatter = new CodeFormatter(); var result = formatter.Format(code, new Options()); var actualFilePath = filePath.Replace(".cst", ".actual.cst"); using var stream = File.Open(actualFilePath, FileMode.Create); using var writer = new StreamWriter(stream, encoding); writer.Write(result.Code); var actualDetectionResult = CharsetDetector.DetectFromFile( filePath ); var actualEncoding = actualDetectionResult.Detected.Encoding; encoding.Should().Be(actualEncoding); }
public bool ParseFormat(string path, out List <SubtitleItem> result) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); var detect = CharsetDetector.DetectFromFile(path); var encoding = Encoding.GetEncoding(detect.Detected.EncodingName); var xmlStream = new StreamReader(path, encoding).BaseStream; // rewind the stream xmlStream.Position = 0; var items = new List <SubtitleItem>(); // parse xml stream var xmlDoc = new XmlDocument(); xmlDoc.Load(xmlStream); if (xmlDoc.DocumentElement != null) { var nodeList = xmlDoc.DocumentElement.SelectNodes("//text"); if (nodeList != null) { for (var i = 0; i < nodeList.Count; i++) { var node = nodeList[i]; try { var startString = node.Attributes["start"].Value; var start = float.Parse(startString, CultureInfo.InvariantCulture); var durString = node.Attributes["dur"].Value; var duration = float.Parse(durString, CultureInfo.InvariantCulture); var text = node.InnerText; items.Add(new SubtitleItem { StartTime = (int)(start * 1000), EndTime = (int)((start + duration) * 1000), Text = ConvertString(text) }); } catch { result = null; return(false); } } } } if (items.Any()) { result = Filters.RemoveDuplicateItems(items); return(true); } result = null; return(false); }
/// <summary> /// Command line example: detect the encoding of the given file. /// </summary> /// <param name="filename">a filename</param> public static void DetectDemo(string filename) { // Detect from File DetectionResult result = CharsetDetector.DetectFromFile(filename); // Get the best Detection DetectionDetail resultDetected = result.Detected; // detected result may be null. if (resultDetected != null) { // Get the alias of the found encoding string encodingName = resultDetected.EncodingName; // Get the System.Text.Encoding of the found encoding (can be null if not available) Encoding encoding = resultDetected.Encoding; // Get the confidence of the found encoding (between 0 and 1) float confidence = resultDetected.Confidence; if (encoding != null) { Console.WriteLine($"Detection completed: {filename}"); Console.WriteLine($"EncodingWebName: {encoding.WebName}{Environment.NewLine}Confidence: {confidence}"); } else { Console.WriteLine($"Detection completed: {filename}"); Console.WriteLine($"(Encoding is null){Environment.NewLine}EncodingName: {encodingName}{Environment.NewLine}Confidence: {confidence}"); } } else { Console.WriteLine($"Detection failed: {filename}"); } }
private static void TestFile(string expectedCharset, string file) { var result = CharsetDetector.DetectFromFile(file); var detected = result.Detected; StringAssert.AreEqualIgnoringCase(expectedCharset, detected.EncodingName, $"Charset detection failed for {file}. Expected: {expectedCharset}, detected: {detected.EncodingName} ({detected.Confidence * 100}% confidence)"); Assert.NotNull(detected.Encoding); }
private void TestFile(string expectedCharset, string file) { var result = CharsetDetector.DetectFromFile(file); var detected = result.Detected; _logWriter.WriteLine($"- {file} ({expectedCharset}) -> {JsonConvert.SerializeObject(result, Formatting.Indented, new EncodingJsonConverter())}"); StringAssert.AreEqualIgnoringCase(expectedCharset, detected.EncodingName, $"Charset detection failed for {file}. Expected: {expectedCharset}, detected: {detected.EncodingName} ({detected.Confidence * 100.0f:0.00############}% confidence)"); Assert.NotNull(detected.Encoding); }
public bool ParseFormat(string path, out List <SubtitleItem> result) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); var detect = CharsetDetector.DetectFromFile(path); var encoding = Encoding.GetEncoding(detect.Detected.EncodingName); var xmlStream = new StreamReader(path, encoding).BaseStream; xmlStream.Position = 0; var items = new List <SubtitleItem>(); var xElement = XElement.Load(xmlStream); var tt = xElement.GetNamespaceOfPrefix("tt") ?? xElement.GetDefaultNamespace(); var nodeList = xElement.Descendants(tt + "p").ToList(); foreach (var node in nodeList) { try { var reader = node.CreateReader(); reader.MoveToContent(); var beginString = node.Attribute("begin").Value.Replace("t", ""); var startTicks = ParseTimecode(beginString); var endString = node.Attribute("end").Value.Replace("t", ""); var endTicks = ParseTimecode(endString); var text = reader.ReadInnerXml() .Replace("<tt:", "<") .Replace("</tt:", "</") .Replace(string.Format(@" xmlns:tt=""{0}""", tt), "") .Replace(string.Format(@" xmlns=""{0}""", tt), ""); items.Add(new SubtitleItem { StartTime = (int)startTicks, EndTime = (int)endTicks, Text = ConvertString(text) }); } catch { result = null; return(false); } } if (items.Any()) { result = Filters.RemoveDuplicateItems(items); return(true); } result = null; return(false); }
public static Encoding GetEncoding(this string path) { var detection = CharsetDetector.DetectFromFile(path); var detected = detection.Detected ?? detection.Details .OrderByDescending(d => d.Encoding != default) .ThenByDescending(d => d.Confidence).FirstOrDefault(); var result = detected?.Encoding ?? Encoding.Default; return(result); }
public async Task DetectAsync() { var dr = await Task.Run(() => { return(CharsetDetector.DetectFromFile(this.Path)); }); var encoding = dr.Detected?.Encoding; if (encoding != null) { this.EncodingName = dr.Detected.EncodingName; this.Encoding = dr.Detected.Encoding; this.IsEnabledConvert = true; } }
public void TestFileUnsupportedEncodings(TestCase testCase) { var result = CharsetDetector.DetectFromFile(testCase.InputFile.FullName); var detected = result.Detected; _logWriter.WriteLine(string.Concat( $"- {testCase.InputFile.FullName} ({testCase.ExpectedEncoding}) -> ", $"{JsonConvert.SerializeObject(result, Formatting.Indented, new EncodingJsonConverter())}")); StringAssert.AreEqualIgnoringCase( testCase.ExpectedEncoding, detected.EncodingName, string.Concat( $"Charset detection failed for {testCase.InputFile.FullName}. ", $"Expected: {testCase.ExpectedEncoding}. ", $"Detected: {detected.EncodingName} ", $"({detected.Confidence * 100.0f:0.00############}% confidence).")); }
private static void Process(string charset, string dirname) { string path = Path.Combine(DATA_ROOT, dirname); if (!Directory.Exists(path)) { return; } string[] files = Directory.GetFiles(path); foreach (string file in files) { var result = CharsetDetector.DetectFromFile(file); var detected = result.Detected; Assert.True(charset == detected.EncodingName, string.Format("Charset detection failed for {0}. Expected: {1}, detected: {2} ({3}% confidence)", file, charset, detected.EncodingName, detected.Confidence * 100)); Assert.NotNull(detected.Encoding); } }
public static Encoding GetEncodingType(string fileName) { DetectionResult result = CharsetDetector.DetectFromFile(fileName); DetectionDetail resultDetected = result.Detected; if (resultDetected.Confidence < 0.7) { try { return(Encoding.GetEncoding("GB18030")); } catch { return(Encoding.GetEncoding("GB2312")); } } Encoding encoding = resultDetected.Encoding; return(encoding); }
private async Task <FileInfoDto> GetFileInfo(string fullpath, bool withContent = true) { var attributes = File.GetAttributes(fullpath); FileInfoDto fileInfoDto = new FileInfoDto { Size = 0, FullPath = fullpath }; if ((attributes & FileAttributes.Directory) == FileAttributes.Directory) { fileInfoDto.Dir = true; fileInfoDto.Name = Path.GetFileName(fullpath); fileInfoDto.Path = Path.GetRelativePath(basePath, Directory.GetParent(fullpath).FullName); } else { FileInfo fileInfo = new FileInfo(fullpath); fileInfoDto.Name = fileInfo.Name; fileInfoDto.Path = Path.GetRelativePath(basePath, Directory.GetParent(fullpath).FullName); fileInfoDto.Size = fileInfo.Length; fileInfoDto.UpdateTime = fileInfo.LastWriteTime; fileInfoDto.Url = new Uri(fileInfo.FullName).AbsoluteUri; fileInfoDto.ContentType = GetMimeType(fileInfo.FullName); if (withContent) { var charsetResult = CharsetDetector.DetectFromFile(fileInfo); if (charsetResult.Detected != null) { var encoding = charsetResult.Detected.Encoding; fileInfoDto.Content = await File.ReadAllTextAsync(fileInfo.FullName, encoding); } else { fileInfoDto.Content = "not support Blob file"; } } } return(fileInfoDto); }
/// <summary> /// Command line example: detects the encoding of the given file. /// </summary> /// <param name="args">a filename</param> public static void Main(String[] args) { if (args.Length == 0) { Console.WriteLine("Usage: udetect <filename>"); return; } string filename = args[0]; var result = CharsetDetector.DetectFromFile(filename); if (result.Detected != null) { Console.WriteLine("Charset: {0}, confidence: {1}", result.Detected.EncodingName, result.Detected.Confidence); } else { Console.WriteLine("Detection failed."); } }
/// <summary> /// Command line example: detects the encoding of the given file. /// </summary> /// <param name="args">a filename</param> public static void Main(string[] args) { if (args.Length == 0) { Console.WriteLine("Usage: ConsoleExample <filename>"); return; } var filename = args[0]; if (!File.Exists(filename)) { Console.WriteLine($"File not found: {filename}"); return; } var result = CharsetDetector.DetectFromFile(filename); var message = result.Detected != null ? $"Detected encoding {result.Detected.Encoding.WebName} with confidence {result.Detected.Confidence}." : $"Detection failed: {filename}"; Console.WriteLine(message); }
public void Setup() { ParseJsonErrors = new List <ParseJsonError>(); WrongEncodingJsonErrors = new List <JsonEncodingError>(); var packageJsonPath = Path.Combine(BasePath, @"package.json"); var jsonPackage = JObject.Parse(File.ReadAllText(packageJsonPath)); var moduleWorkspaces = ((JArray)jsonPackage["workspaces"]) .Select(p => ((string)p).Replace("/", "\\")) .Where(w => !w.Contains("asc-web-components")) .ToList(); Workspaces = new List <string>(); Workspaces.AddRange(moduleWorkspaces); Workspaces.Add("public\\locales"); var translationFiles = from wsPath in Workspaces let clientDir = Path.Combine(BasePath, wsPath) from filePath in Directory.EnumerateFiles(clientDir, "*.json", SearchOption.AllDirectories) where filePath.Contains("public\\locales\\") select Path.GetFullPath(filePath); TranslationFiles = new List <TranslationFile>(); foreach (var path in translationFiles) { try { var result = CharsetDetector.DetectFromFile(path); if (result.Detected.EncodingName != "utf-8" && result.Detected.EncodingName != "ascii") { WrongEncodingJsonErrors.Add( new JsonEncodingError(path, result.Detected)); } using (var md5 = MD5.Create()) { using (var stream = File.OpenRead(path)) { var hash = md5.ComputeHash(stream); var md5hash = BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); stream.Position = 0; using var sr = new StreamReader(stream, Encoding.UTF8); { var jsonTranslation = JObject.Parse(sr.ReadToEnd()); var translationFile = new TranslationFile(path, jsonTranslation.Properties() .Select(p => new TranslationItem(p.Name, (string)p.Value)) .ToList(), md5hash); TranslationFiles.Add(translationFile); } } } /* Re-write by order */ //var orderedList = jsonTranslation.Properties().OrderBy(t => t.Name); //var result = new JObject(orderedList); //var sortedJsonString = JsonConvert.SerializeObject(result, Formatting.Indented); //File.WriteAllText(path, sortedJsonString); } catch (Exception ex) { ParseJsonErrors.Add(new ParseJsonError(path, ex)); Debug.WriteLine($"File path = {path} failed to parse with error: {ex.Message}"); } } var javascriptFiles = (from wsPath in Workspaces let clientDir = Path.Combine(BasePath, wsPath) from file in Directory.EnumerateFiles(clientDir, "*.js", SearchOption.AllDirectories) where !file.Contains("dist\\") select file) .ToList(); javascriptFiles.AddRange(from wsPath in Workspaces let clientDir = Path.Combine(BasePath, wsPath) from file in Directory.EnumerateFiles(clientDir, "*.jsx", SearchOption.AllDirectories) where !file.Contains("dist\\") select file); JavaScriptFiles = new List <JavaScriptFile>(); var pattern1 = "[.{\\s\\(]t\\(\\s*[\"\'`]([a-zA-Z0-9_.:_\\s{}/_-]+)[\"\'`]\\s*[\\),]"; var pattern2 = "i18nKey=\"([a-zA-Z0-9_.-]+)\""; var regexp = new Regex($"({pattern1})|({pattern2})", RegexOptions.Multiline | RegexOptions.ECMAScript); var notTranslatedToastsRegex = new Regex("(?<=toastr.info\\([\"`\'])(.*)(?=[\"\'`])" + "|(?<=toastr.error\\([\"`\'])(.*)(?=[\"\'`])" + "|(?<=toastr.success\\([\"`\'])(.*)(?=[\"\'`])" + "|(?<=toastr.warn\\([\"`\'])(.*)(?=[\"\'`])", RegexOptions.Multiline | RegexOptions.ECMAScript); NotTranslatedToasts = new List <KeyValuePair <string, string> >(); foreach (var path in javascriptFiles) { var jsFileText = File.ReadAllText(path); var toastMatches = notTranslatedToastsRegex.Matches(jsFileText).ToList(); if (toastMatches.Any()) { foreach (var toastMatch in toastMatches) { var found = toastMatch.Value; if (!string.IsNullOrEmpty(found) && !NotTranslatedToasts.Exists(t => t.Value == found)) { NotTranslatedToasts.Add(new KeyValuePair <string, string>(path, found)); } } } var matches = regexp.Matches(jsFileText); var translationKeys = matches .Select(m => m.Groups[2].Value == "" ? m.Groups[4].Value : m.Groups[2].Value) .ToList(); if (!translationKeys.Any()) { continue; } var jsFile = new JavaScriptFile(path); jsFile.TranslationKeys = translationKeys; JavaScriptFiles.Add(jsFile); } ModuleFolders = new List <ModuleFolder>(); var list = TranslationFiles .Select(t => new { ModulePath = moduleWorkspaces.FirstOrDefault(m => t.FilePath.Contains(m)), Language = new LanguageItem { Path = t.FilePath, Language = t.Language, Translations = t.Translations }, lng = t.Language }).ToList(); var moduleTranslations = list .GroupBy(t => t.ModulePath) .Select(g => new { ModulePath = g.Key, Languages = g.ToList().Select(t => t.Language).ToList() .ToList() }) .ToList(); var moduleJsTranslatedFiles = JavaScriptFiles .Select(t => new { ModulePath = moduleWorkspaces.FirstOrDefault(m => t.Path.Contains(m)), t.Path, t.TranslationKeys }) .GroupBy(t => t.ModulePath) .Select(g => new { ModulePath = g.Key, TranslationKeys = g.ToList().SelectMany(t => t.TranslationKeys).ToList() }) .ToList(); foreach (var ws in moduleWorkspaces) { var t = moduleTranslations.FirstOrDefault(t => t.ModulePath == ws); var j = moduleJsTranslatedFiles.FirstOrDefault(t => t.ModulePath == ws); if (j == null && t == null) { continue; } ModuleFolders.Add(new ModuleFolder { Path = ws, AvailableLanguages = t?.Languages, AppliedJsTranslationKeys = j?.TranslationKeys }); } CommonTranslations = TranslationFiles .Where(file => file.FilePath.StartsWith($"{BasePath}public\\locales")) .Select(t => new LanguageItem { Path = t.FilePath, Language = t.Language, Translations = t.Translations }).ToList(); }
public bool ParseFormat(string path, out List <SubtitleItem> result) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); var detect = CharsetDetector.DetectFromFile(path); var encoding = Encoding.GetEncoding(detect.Detected.EncodingName); var subStream = new StreamReader(path, encoding).BaseStream; subStream.Position = 0; var reader = new StreamReader(subStream, encoding, true); var firstLine = reader.ReadLine(); if (firstLine == FirstLine) { var line = reader.ReadLine(); var lineNumber = 2; while (line != null && lineNumber <= MaxLineNumberForItems && !IsTimestampLine(line)) { line = reader.ReadLine(); lineNumber++; } if (line != null && lineNumber <= MaxLineNumberForItems && IsTimestampLine(line)) { var items = new List <SubtitleItem>(); var timeCodeLine = line; var textLines = new List <string>(); while (line != null) { line = reader.ReadLine(); if (IsTimestampLine(line)) { var timeCodes = ParseTimecodeLine(timeCodeLine); var start = timeCodes.Item1; var end = timeCodes.Item2; if (start > 0 && end > 0 && textLines.Any()) { items.Add(new SubtitleItem { StartTime = start, EndTime = end, Text = ConvertString(string.Join("\r\n", textLines.ToArray())) }); } timeCodeLine = line; textLines = new List <string>(); } else { if (!string.IsNullOrEmpty(line)) { textLines.Add(line); } } } var lastTimeCodes = ParseTimecodeLine(timeCodeLine); var lastStart = lastTimeCodes.Item1; var lastEnd = lastTimeCodes.Item2; if (lastStart > 0 && lastEnd > 0 && textLines.Any()) { items.Add(new SubtitleItem { StartTime = lastStart, EndTime = lastEnd, Text = ConvertString(string.Join("\r\n", textLines.ToArray())) }); } if (items.Any()) { result = Filters.RemoveDuplicateItems(items); return(true); } result = null; return(false); } result = null; return(false); } result = null; return(false); }
private static async Task DoWork( string file, string?path, bool validate, bool check, CancellationToken cancellationToken) { if ( file.EndsWith(".g.cs") || file.EndsWith(".cshtml.cs") || file.ContainsIgnoreCase("\\obj\\") || file.ContainsIgnoreCase("/obj/") || file.EndsWithIgnoreCase("AllInOne.cs") ) { return; } cancellationToken.ThrowIfCancellationRequested(); using var reader = new StreamReader(file); var code = await reader.ReadToEndAsync(); var detectionResult = CharsetDetector.DetectFromFile(file); var encoding = detectionResult.Detected.Encoding; reader.Close(); cancellationToken.ThrowIfCancellationRequested(); CSharpierResult result; string GetPath() { return(PadToSize(file.Substring(path?.Length ?? 0))); } try { result = await new CodeFormatter().FormatAsync( code, new Options(), cancellationToken ); } catch (OperationCanceledException) { throw; } catch (Exception ex) { Interlocked.Increment(ref files); Console.WriteLine( GetPath() + " - threw exception while formatting" ); Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); Console.WriteLine(); Interlocked.Increment(ref exceptionsFormatting); return; } if (result.Errors.Any()) { Interlocked.Increment(ref files); Console.WriteLine(GetPath() + " - failed to compile"); return; } if (!result.FailureMessage.IsBlank()) { Interlocked.Increment(ref files); Console.WriteLine(GetPath() + " - " + result.FailureMessage); return; } if (validate) { var syntaxNodeComparer = new SyntaxNodeComparer( code, result.Code, cancellationToken ); try { var failure = await syntaxNodeComparer.CompareSourceAsync( cancellationToken ); if (!string.IsNullOrEmpty(failure)) { Interlocked.Increment(ref sourceLost); Console.WriteLine( GetPath() + " - failed syntax tree validation" ); Console.WriteLine(failure); } } catch (Exception ex) { Interlocked.Increment(ref exceptionsValidatingSource); Console.WriteLine( GetPath() + " - failed with exception during syntax tree validation" + Environment.NewLine + ex.Message + ex.StackTrace ); } } if (check) { if (result.Code != code) { Console.WriteLine(GetPath() + " - was not formatted"); Interlocked.Increment(ref unformattedFiles); } } cancellationToken.ThrowIfCancellationRequested(); Interlocked.Increment(ref files); if (!check) { // purposely avoid async here, that way the file completely writes if the process gets cancelled while running. File.WriteAllText(file, result.Code, encoding); } }
public bool ParseFormat(string path, out List <SubtitleItem> result) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); var detect = CharsetDetector.DetectFromFile(path); var encoding = Encoding.GetEncoding(detect.Detected.EncodingName); var subStream = new StreamReader(path, encoding).BaseStream; if (!subStream.CanRead || !subStream.CanSeek) { result = null; return(false); } subStream.Position = 0; var reader = new StreamReader(subStream, encoding, true); var items = new List <SubtitleItem>(); var line = reader.ReadLine(); while (line != null && !IsMicroDvdLine(line)) { line = reader.ReadLine(); } if (line != null) { float frameRate; var firstItem = ParseLine(line, DefaultFrameRate); if (firstItem.Text != null && firstItem.Text.Any()) { var success = TryExtractFrameRate(firstItem.Text, out frameRate); if (!success) { frameRate = DefaultFrameRate; items.Add(firstItem); } } else { frameRate = DefaultFrameRate; } line = reader.ReadLine(); while (line != null) { if (!string.IsNullOrEmpty(line)) { var item = ParseLine(line, frameRate); items.Add(item); } line = reader.ReadLine(); } } if (items.Any()) { result = Filters.RemoveDuplicateItems(items); return(true); } result = null; return(false); }
public bool ParseFormat(string path, out List <SubtitleItem> result) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); var detect = CharsetDetector.DetectFromFile(path); var encoding = Encoding.GetEncoding(detect.Detected.EncodingName); var ssaStream = new StreamReader(path, encoding).BaseStream; if (!ssaStream.CanRead || !ssaStream.CanSeek) { result = null; return(false); } ssaStream.Position = 0; var reader = new StreamReader(ssaStream, encoding, true); var line = reader.ReadLine(); var lineNumber = 1; while (line != null && line != EventLine) { line = reader.ReadLine(); lineNumber++; } if (line != null) { var headerLine = reader.ReadLine(); if (!string.IsNullOrEmpty(headerLine)) { var columnHeaders = headerLine.Split(Separator).Select(head => head.Trim()).ToList(); var startIndexColumn = columnHeaders.IndexOf(StartColumn); var endIndexColumn = columnHeaders.IndexOf(EndColumn); var textIndexColumn = columnHeaders.IndexOf(TextColumn); if (startIndexColumn > 0 && endIndexColumn > 0 && textIndexColumn > 0) { var items = new List <SubtitleItem>(); line = reader.ReadLine(); while (line != null) { if (!string.IsNullOrEmpty(line)) { var columns = line.Split(Separator); var startText = columns[startIndexColumn]; var endText = columns[endIndexColumn]; var textLine = string.Join(",", columns.Skip(textIndexColumn)); var start = ParseSsaTimecode(startText); var end = ParseSsaTimecode(endText); if (start > 0 && end > 0 && !string.IsNullOrEmpty(textLine)) { var item = new SubtitleItem { StartTime = start, EndTime = end, Text = ConvertString(textLine) }; items.Add(item); } } line = reader.ReadLine(); } if (items.Any()) { result = Filters.RemoveDuplicateItems(items); return(true); } result = null; return(false); } result = null; return(false); } result = null; return(false); } result = null; return(false); }
public bool ParseFormat(string path, out List <SubtitleItem> result) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); var detect = CharsetDetector.DetectFromFile(path); var encoding = Encoding.GetEncoding(detect.Detected.EncodingName); var items = new List <SubtitleItem>(); var sr = new StreamReader(path, encoding); var line = sr.ReadLine(); if (line == null || !line.Equals("<SAMI>")) { sr.Close(); result = null; return(false); } while ((line = sr.ReadLine()) != null) { if (line.Equals("<BODY>")) { break; } } if (string.IsNullOrEmpty(line)) { sr.Close(); result = null; return(false); } var check = false; var miClassString = new string[2]; var sb = new StringBuilder(); var sbComment = false; while (string.IsNullOrEmpty(line) != true) { if (check == false) { line = sr.ReadLine(); while (true) { if (string.IsNullOrEmpty(line)) { line = sr.ReadLine(); } else { break; } } } else { check = false; } if (line.Contains("<--") && line.Contains("-->")) { continue; } if (line.Contains("<!--") && line.Contains("-->")) { continue; } if (line.Contains("<!--")) { sbComment = true; } if (line.Contains("-->")) { sbComment = false; } if (sbComment) { continue; } if (line.Contains("</BODY>")) { break; } if (line.Contains("</SAMI>")) { break; } if (line[0].Equals('<')) { var length = line.IndexOf('>'); miClassString[0] = line.Substring(1, length - 1); miClassString[1] = line.Substring(length + 2); var splitIndex = miClassString[1].IndexOf('>'); miClassString[1] = miClassString[1].Remove(splitIndex); var miSync = miClassString[0].Split('='); while ((line = sr.ReadLine())?.ToUpper().Contains("<SYNC", StringComparison.OrdinalIgnoreCase) == false) { sb.Append(line); } items.Add(new SubtitleItem(int.Parse(miSync[1]), ConvertString(sb.ToString()))); sb = new StringBuilder(); check = true; } } sr.Close(); for (var i = 0; i < items.Count; i++) { var endTime = i == items.Count - 1 ? items[i].StartTime + 1000 : items[i + 1].StartTime; items[i].EndTime = endTime; } result = Filters.RemoveDuplicateItems(items); return(true); }
public bool ParseFormat(string path, out List <SubtitleItem> result) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); var detect = CharsetDetector.DetectFromFile(path); var encoding = Encoding.GetEncoding(detect.Detected.EncodingName); var vttStream = new StreamReader(path, encoding).BaseStream; if (!vttStream.CanRead || !vttStream.CanSeek) { result = null; return(false); } vttStream.Position = 0; var reader = new StreamReader(vttStream, encoding, true); var items = new List <SubtitleItem>(); var vttSubParts = GetVttSubTitleParts(reader).ToList(); if (vttSubParts.Any()) { foreach (var vttSubPart in vttSubParts) { var lines = vttSubPart.Split(new[] { Environment.NewLine }, StringSplitOptions.None) .Select(s => s.Trim()) .Where(l => !string.IsNullOrEmpty(l)) .ToList(); var item = new SubtitleItem(); foreach (var line in lines) { if (item.StartTime == 0 && item.EndTime == 0) { int startTc; int endTc; var success = TryParseTimecodeLine(line, out startTc, out endTc); if (success) { item.StartTime = startTc; item.EndTime = endTc; } } else { item.Text = ConvertString(line); } item.Text = string.IsNullOrEmpty(item.Text) ? "" : item.Text; } if ((item.StartTime != 0 || item.EndTime != 0) && item.Text.Any()) { items.Add(item); } } result = Filters.RemoveDuplicateItems(items); return(true); } result = null; return(false); }