private static List <string> ExtractVttContent(string token, string vttUrl) { List <string> sentences = new List <string>(); using (var client = new WebClient()) { client.Headers.Clear(); client.Headers.Add("authorization", token); var content = client.DownloadData(vttUrl); using (var stream = new MemoryStream(content)) { string fileName = "file.txt"; string path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, fileName); FileStream file = new FileStream(path, FileMode.Create, FileAccess.Write); stream.WriteTo(file); file.Close(); SubtitlesParser.Classes.Parsers.SubParser parser = new SubtitlesParser.Classes.Parsers.SubParser(); using (var fileStream = new FileStream(path, FileMode.Open, FileAccess.Read)) { try { var mostLikelyFormat = parser.GetMostLikelyFormat(fileStream.Name); var items = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat); string sentence = " "; foreach (var item in items) { foreach (var line in item.Lines) { foreach (var word in line.ToCharArray()) { sentence = sentence + word; if (word.Equals('.') || word.Equals('?')) { sentences.Add(sentence); sentence = string.Empty; } } sentence = sentence + " "; } } //Remove small talks from trans script sentences RemoveSmallTalks(sentences); } catch (Exception ex) { } } } } return(sentences); }
private static List <SubtitleItem> GetSubtitleText(string FilePath) { var parser = new SubtitlesParser.Classes.Parsers.SubParser(); List <SubtitleItem> items = null; using (var fileStream = File.OpenRead(FilePath)) { items = parser.ParseStream(fileStream, Encoding.UTF8); } return(items); }
public static IEnumerable <Sample> SearchVocabularyInSubtitles(string[] vocab, string SubtilesFolder) { var samplesWithVocab = new List <Sample>(); String[] files = System.IO.Directory.GetFiles(SubtilesFolder); Console.WriteLine($"Files : {files[0]}"); Parallel.ForEach(files, (currentFile) => { String fileName = System.IO.Path.GetFullPath(currentFile); var parser = new SubtitlesParser.Classes.Parsers.SubParser(); using (var fileStream = File.OpenRead(fileName)) { try { var mostLikelyFormat = parser.GetMostLikelyFormat(fileName); var items = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat); foreach (var sequence in items) { foreach (var sentence in sequence.Lines) { // On enlève la ponctuation var sentenceStripped = Regex.Replace(sentence, @"[^\w\s]", ""); sentenceStripped.ToLower(); // Recherche du vocabulaire dans la séquence if (sentenceStripped.Split(" ").Contains(vocab[0])) { samplesWithVocab.Add(new Sample { StartTime = new TimeSpan(sequence.StartTime), EndTime = new TimeSpan(sequence.EndTime), SubFileName = fileName }); var strConcat = ""; foreach (var str in sequence.Lines) { strConcat += str + " "; } Console.WriteLine(strConcat); } } } }catch (Exception ex) { Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex); } } }); return(samplesWithVocab); }
/// <summary> /// Get captions from local path /// </summary> /// <param name="filePath"></param> /// <returns></returns> public IEnumerable <SubtitleItem> LoadCaptions(string filePath) { if (string.IsNullOrEmpty(filePath)) { return(new List <SubtitleItem>()); } var parser = new SubtitlesParser.Classes.Parsers.SubParser(); using (var fileStream = File.OpenRead(filePath)) { return(parser.ParseStream(fileStream, Encoding.GetEncoding("iso-8859-1"))); } }
private void button1_Click(object sender, EventArgs e) { var fileContent = string.Empty; var filePath = string.Empty; using (OpenFileDialog openFileDialog = new OpenFileDialog()) { //openFileDialog.InitialDirectory = "c:\\"; openFileDialog.Filter = "All files (*.*)|*.*"; //openFileDialog.FilterIndex = 2; openFileDialog.RestoreDirectory = true; if (openFileDialog.ShowDialog() == DialogResult.OK) { //Get the path of specified file filePath = openFileDialog.FileName; //Read the contents of the file into a stream // var fileStream = openFileDialog.OpenFile(); var parser = new SubtitlesParser.Classes.Parsers.SubParser(); using (var fileStream = openFileDialog.OpenFile()) { items = parser.ParseStream(fileStream); } } string str = ""; foreach (var item in items) { foreach (var wordList in item.Lines) { str += wordList + " "; } } var words = Regex.Split(Regex.Replace(str.ToLower(), @"[^a-zA-Z ']+", ""), " ") .Where(x => !string.IsNullOrEmpty(x)) .GroupBy(g => g) .Select(s => new { Word = s.Key, Count = s.Count() }); int countwords = 0; foreach (var count in words.OrderByDescending(x => x.Count).ToList()) { countwords++; Console.WriteLine(count); ListViewItem lvi = new ListViewItem(); // установка названия файла lvi.Text = count.Word + " " + count.Count; listView1.Items.Add(lvi); } Console.WriteLine("count = " + countwords); } }
static void Main(string[] args) { var parser = new SubtitlesParser.Classes.Parsers.SubParser(); var allFiles = BrowseTestSubtitlesFiles(); foreach (var file in allFiles) { var fileName = Path.GetFileName(file); using (var fileStream = File.OpenRead(file)) { try { SubtitlesFormat format = parser.GetFormat(fileName); List <SubtitleItem> items = parser.ParseStream(fileStream, Encoding.UTF8, format); if (items.Any()) { //Console.WriteLine("Parsing of file {0}: SUCCESS ({1} items - {2}% corrupted)", // fileName, items.Count, (items.Count(it => it.StartTime <= 0 || it.EndTime <= 0) * 100)/ items.Count); foreach (var item in items) { //Console.WriteLine(item); } var duplicates = items.GroupBy(it => new { it.StartTime, it.EndTime }).Where(grp => grp.Count() > 1); //Console.WriteLine("{0} duplicate items", duplicates.Count()); //Console.WriteLine("----------------"); } else { throw new ArgumentException("Not items found!"); } } catch (Exception ex) { Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex); } } Console.WriteLine("----------------------"); } Console.ReadLine(); }
/// <summary> /// Get captions from local path /// </summary> /// <param name="filePath"></param> /// <returns></returns> public string LoadCaptions(string filePath) { if (string.IsNullOrEmpty(filePath)) { return(string.Empty); } try { var parser = new SubtitlesParser.Classes.Parsers.SubParser(); using (var fileStream = File.OpenRead(filePath)) { var lines = parser.ParseStream(fileStream, Encoding.UTF8); var file = $@"{Path.GetDirectoryName(filePath)}\{Guid.NewGuid()}.srt"; using (var srtFile = new StreamWriter(file, false, Encoding.UTF8)) { var count = 1; foreach (var line in lines) { if (line.StartTime <= 0 || line.EndTime <= 0) { continue; } srtFile.WriteLine(count); srtFile.WriteLine( $"{TimeSpan.FromMilliseconds(line.StartTime).ToString("hh\\:mm\\:ss\\,fff")} --> {TimeSpan.FromMilliseconds(line.EndTime).ToString("hh\\:mm\\:ss\\,fff")}"); foreach (var item in line.Lines) { srtFile.WriteLine(item); } srtFile.WriteLine(); count++; } } return(file); } } catch (Exception ex) { Logger.Error(ex); return(string.Empty); } }
static void Main(string[] args) { var parser = new SubtitlesParser.Classes.Parsers.SubParser(); var allFiles = BrowseTestSubtitlesFiles(); foreach (var file in allFiles) { var fileName = Path.GetFileName(file); using (var fileStream = File.OpenRead(file)) { try { var mostLikelyFormat = parser.GetMostLikelyFormat(fileName); var items = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat); if (items.Any()) { Console.WriteLine("Parsing of file {0}: SUCCESS ({1} items - {2}% corrupted)", fileName, items.Count, (items.Count(it => it.StartTime <= 0 || it.EndTime <= 0) * 100)/ items.Count); /*foreach (var item in items) { Console.WriteLine(item); }*/ /*var duplicates = items.GroupBy(it => new {it.StartTime, it.EndTime}).Where(grp => grp.Count() > 1); Console.WriteLine("{0} duplicate items", duplicates.Count());*/ Console.WriteLine("----------------"); } else { throw new ArgumentException("Not items found!"); } } catch (Exception ex) { Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex); } } Console.WriteLine("----------------------"); } Console.ReadLine(); }
static void Main(string[] args) { var parser = new SubtitlesParser.Classes.Parsers.SubParser(); var allFiles = BrowseTestSubtitlesFiles(); foreach (var file in allFiles) { var fileName = Path.GetFileName(file); using (var fileStream = File.OpenRead(file)) { try { var items = parser.ParseStream(fileStream); if (items.Any()) { Console.WriteLine("Parsing of file {0}: SUCCESS ({1} items)", fileName, items.Count); /*Console.WriteLine(); * Console.WriteLine(string.Join(Environment.NewLine, items.Take(5)));*/ } else { throw new ArgumentException("Not items found!"); } } catch (Exception ex) { Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex); } } Console.WriteLine("----------------------"); } Console.ReadLine(); }
/// <summary> /// Process all files /// </summary> public void Run(string path, string outputFolder, string fromLang, string toLang, ApiType api, string apiKey, bool askForRetry = false, long maxCharactersToSend = 0, bool peek = false) { Console.WriteLine($"Version: {Assembly.GetExecutingAssembly().GetName().Version}"); ShowNotice(); CreateAndCheckOutputFolder(outputFolder); SetService(api); Console.WriteLine(); var files = GetFiles(path).ToList(); int totalCharacters = 0; int currentFileCount = 0; long remainingCharactersToSend = maxCharactersToSend; foreach (var file in files) { var parser = new SubtitlesParser.Classes.Parsers.SubParser(); List <MySubtitleItem> myItems = new List <MySubtitleItem>(); List <SubtitleItem> items = new List <SubtitleItem>(); using (var stream = File.OpenRead(file)) { try { items = parser.ParseStream(stream); } catch (FormatException ex) { Console.WriteLine($"WARNING! Invalid subtitle file [{file}] Skipping file. Error message: {ex.Message}"); continue; } myItems.Capacity = items.Count; foreach (var item in items) { myItems.Add(item.CopyToMySubtitleItem()); } } foreach (var item in myItems) { if (item.EndTime <= 0 || item.StartTime <= 0 || item.Lines.Count <= 0) { throw new Exception($"Invalid subtitle file: {file}"); } foreach (var line in item.Lines) { if (string.IsNullOrWhiteSpace(line)) { throw new Exception($"Empty line detected in {file}"); } } } if (myItems.Count == 0) { throw new Exception($"File {file} has no subtitles..."); } currentFileCount++; Console.WriteLine($"[START] File {file} has {myItems.Count} subtitle items"); Console.WriteLine($"Processing file {currentFileCount} out of {files.Count}"); var translatedItems = TranslationService.Translate(fromLang, toLang, myItems, apiKey, askForRetry, remainingCharactersToSend, peek); if (translatedItems.SkippedBecauseOfCharacterLimit) { Console.WriteLine($"Character limit reached. Skipping current and remaining files..."); break; } ValidateItems(items, translatedItems.TranslatedItems.ToImmutableList()); WriteToFile(translatedItems.TranslatedItems.ToImmutableList(), file, outputFolder); totalCharacters += translatedItems.TranslatedCharacters; remainingCharactersToSend -= translatedItems.TranslatedCharacters; Console.WriteLine(); Console.WriteLine($"Remaining characters to send: {remainingCharactersToSend}"); Console.WriteLine($"Characters translated (of file): {translatedItems.TranslatedCharacters}"); Console.WriteLine($"Current total characters sent: {totalCharacters}"); Console.WriteLine($"[FINISH] Finished file: {file}"); Console.WriteLine("-------------------------------------------------"); Console.WriteLine(); } Console.WriteLine(); Console.WriteLine("Completed all files."); Console.WriteLine($"Remaining characters (unused): {remainingCharactersToSend}"); Console.WriteLine($"Total characters used: {totalCharacters}"); Console.WriteLine($"Char limit: {maxCharactersToSend}"); Console.WriteLine($"Char limit - total characters used = {maxCharactersToSend - totalCharacters}"); }