Пример #1
0
        private static List <string> ExtractVttContent(string token, string vttUrl)
        {
            List <string> sentences = new List <string>();

            using (var client = new WebClient())
            {
                client.Headers.Clear();

                client.Headers.Add("authorization", token);
                var content = client.DownloadData(vttUrl);
                using (var stream = new MemoryStream(content))
                {
                    string     fileName = "file.txt";
                    string     path     = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, fileName);
                    FileStream file     = new FileStream(path, FileMode.Create, FileAccess.Write);
                    stream.WriteTo(file);
                    file.Close();

                    SubtitlesParser.Classes.Parsers.SubParser parser = new SubtitlesParser.Classes.Parsers.SubParser();

                    using (var fileStream = new FileStream(path, FileMode.Open, FileAccess.Read))
                    {
                        try
                        {
                            var mostLikelyFormat = parser.GetMostLikelyFormat(fileStream.Name);
                            var items            = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat);

                            string sentence = " ";
                            foreach (var item in items)
                            {
                                foreach (var line in item.Lines)
                                {
                                    foreach (var word in line.ToCharArray())
                                    {
                                        sentence = sentence + word;

                                        if (word.Equals('.') || word.Equals('?'))
                                        {
                                            sentences.Add(sentence);
                                            sentence = string.Empty;
                                        }
                                    }
                                    sentence = sentence + " ";
                                }
                            }
                            //Remove small talks from trans script sentences
                            RemoveSmallTalks(sentences);
                        }
                        catch (Exception ex)
                        {
                        }
                    }
                }
            }
            return(sentences);
        }
Пример #2
0
        private static List <SubtitleItem> GetSubtitleText(string FilePath)
        {
            var parser = new SubtitlesParser.Classes.Parsers.SubParser();
            List <SubtitleItem> items = null;

            using (var fileStream = File.OpenRead(FilePath)) {
                items = parser.ParseStream(fileStream, Encoding.UTF8);
            }
            return(items);
        }
        public static IEnumerable <Sample> SearchVocabularyInSubtitles(string[] vocab, string SubtilesFolder)
        {
            var samplesWithVocab = new List <Sample>();

            String[] files = System.IO.Directory.GetFiles(SubtilesFolder);
            Console.WriteLine($"Files : {files[0]}");

            Parallel.ForEach(files, (currentFile) => {
                String fileName = System.IO.Path.GetFullPath(currentFile);
                var parser      = new SubtitlesParser.Classes.Parsers.SubParser();

                using (var fileStream = File.OpenRead(fileName))
                {
                    try
                    {
                        var mostLikelyFormat = parser.GetMostLikelyFormat(fileName);
                        var items            = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat);
                        foreach (var sequence in items)
                        {
                            foreach (var sentence in sequence.Lines)
                            {
                                // On enlève la ponctuation
                                var sentenceStripped = Regex.Replace(sentence, @"[^\w\s]", "");
                                sentenceStripped.ToLower();

                                // Recherche du vocabulaire dans la séquence
                                if (sentenceStripped.Split(" ").Contains(vocab[0]))
                                {
                                    samplesWithVocab.Add(new Sample
                                    {
                                        StartTime   = new TimeSpan(sequence.StartTime),
                                        EndTime     = new TimeSpan(sequence.EndTime),
                                        SubFileName = fileName
                                    });

                                    var strConcat = "";
                                    foreach (var str in sequence.Lines)
                                    {
                                        strConcat += str + " ";
                                    }
                                    Console.WriteLine(strConcat);
                                }
                            }
                        }
                    }catch (Exception ex) {
                        Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex);
                    }
                }
            });

            return(samplesWithVocab);
        }
Пример #4
0
        /// <summary>
        /// Get captions from local path
        /// </summary>
        /// <param name="filePath"></param>
        /// <returns></returns>
        public IEnumerable <SubtitleItem> LoadCaptions(string filePath)
        {
            if (string.IsNullOrEmpty(filePath))
            {
                return(new List <SubtitleItem>());
            }
            var parser = new SubtitlesParser.Classes.Parsers.SubParser();

            using (var fileStream = File.OpenRead(filePath))
            {
                return(parser.ParseStream(fileStream, Encoding.GetEncoding("iso-8859-1")));
            }
        }
Пример #5
0
        private void button1_Click(object sender, EventArgs e)
        {
            var fileContent = string.Empty;
            var filePath    = string.Empty;

            using (OpenFileDialog openFileDialog = new OpenFileDialog())
            {
                //openFileDialog.InitialDirectory = "c:\\";
                openFileDialog.Filter = "All files (*.*)|*.*";
                //openFileDialog.FilterIndex = 2;
                openFileDialog.RestoreDirectory = true;

                if (openFileDialog.ShowDialog() == DialogResult.OK)
                {
                    //Get the path of specified file
                    filePath = openFileDialog.FileName;

                    //Read the contents of the file into a stream
                    // var fileStream = openFileDialog.OpenFile();

                    var parser = new SubtitlesParser.Classes.Parsers.SubParser();
                    using (var fileStream = openFileDialog.OpenFile())
                    {
                        items = parser.ParseStream(fileStream);
                    }
                }
                string str = "";
                foreach (var item in items)
                {
                    foreach (var wordList in item.Lines)
                    {
                        str += wordList + " ";
                    }
                }
                var words = Regex.Split(Regex.Replace(str.ToLower(), @"[^a-zA-Z ']+", ""), " ")
                            .Where(x => !string.IsNullOrEmpty(x))
                            .GroupBy(g => g)
                            .Select(s => new { Word = s.Key, Count = s.Count() });
                int countwords = 0;
                foreach (var count in words.OrderByDescending(x => x.Count).ToList())
                {
                    countwords++;
                    Console.WriteLine(count);
                    ListViewItem lvi = new ListViewItem();
                    // установка названия файла
                    lvi.Text = count.Word + " " + count.Count;
                    listView1.Items.Add(lvi);
                }
                Console.WriteLine("count = " + countwords);
            }
        }
Пример #6
0
        static void Main(string[] args)
        {
            var parser = new SubtitlesParser.Classes.Parsers.SubParser();

            var allFiles = BrowseTestSubtitlesFiles();

            foreach (var file in allFiles)
            {
                var fileName = Path.GetFileName(file);
                using (var fileStream = File.OpenRead(file))
                {
                    try
                    {
                        SubtitlesFormat format = parser.GetFormat(fileName);

                        List <SubtitleItem> items = parser.ParseStream(fileStream, Encoding.UTF8, format);

                        if (items.Any())
                        {
                            //Console.WriteLine("Parsing of file {0}: SUCCESS ({1} items - {2}% corrupted)",
                            //    fileName, items.Count, (items.Count(it => it.StartTime <= 0 || it.EndTime <= 0) * 100)/ items.Count);

                            foreach (var item in items)
                            {
                                //Console.WriteLine(item);
                            }

                            var duplicates =
                                items.GroupBy(it => new { it.StartTime, it.EndTime }).Where(grp => grp.Count() > 1);

                            //Console.WriteLine("{0} duplicate items", duplicates.Count());
                            //Console.WriteLine("----------------");
                        }
                        else
                        {
                            throw new ArgumentException("Not items found!");
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex);
                    }
                }
                Console.WriteLine("----------------------");
            }

            Console.ReadLine();
        }
Пример #7
0
        /// <summary>
        /// Get captions from local path
        /// </summary>
        /// <param name="filePath"></param>
        /// <returns></returns>
        public string LoadCaptions(string filePath)
        {
            if (string.IsNullOrEmpty(filePath))
            {
                return(string.Empty);
            }
            try
            {
                var parser = new SubtitlesParser.Classes.Parsers.SubParser();
                using (var fileStream = File.OpenRead(filePath))
                {
                    var lines = parser.ParseStream(fileStream, Encoding.UTF8);
                    var file  = $@"{Path.GetDirectoryName(filePath)}\{Guid.NewGuid()}.srt";
                    using (var srtFile = new StreamWriter(file, false, Encoding.UTF8))
                    {
                        var count = 1;
                        foreach (var line in lines)
                        {
                            if (line.StartTime <= 0 || line.EndTime <= 0)
                            {
                                continue;
                            }

                            srtFile.WriteLine(count);
                            srtFile.WriteLine(
                                $"{TimeSpan.FromMilliseconds(line.StartTime).ToString("hh\\:mm\\:ss\\,fff")} --> {TimeSpan.FromMilliseconds(line.EndTime).ToString("hh\\:mm\\:ss\\,fff")}");
                            foreach (var item in line.Lines)
                            {
                                srtFile.WriteLine(item);
                            }

                            srtFile.WriteLine();
                            count++;
                        }
                    }

                    return(file);
                }
            }
            catch (Exception ex)
            {
                Logger.Error(ex);
                return(string.Empty);
            }
        }
Пример #8
0
        static void Main(string[] args)
        {
            var parser = new SubtitlesParser.Classes.Parsers.SubParser();

            var allFiles = BrowseTestSubtitlesFiles();
            foreach (var file in allFiles)
            {
                var fileName = Path.GetFileName(file);
                using (var fileStream = File.OpenRead(file))
                {
                    try
                    {
                        var mostLikelyFormat = parser.GetMostLikelyFormat(fileName);
                        var items = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat);
                        if (items.Any())
                        {
                            Console.WriteLine("Parsing of file {0}: SUCCESS ({1} items - {2}% corrupted)", 
                                fileName, items.Count, (items.Count(it => it.StartTime <= 0 || it.EndTime <= 0) * 100)/ items.Count);
                            /*foreach (var item in items)
                            {
                                Console.WriteLine(item);
                            }*/
                            /*var duplicates =
                                items.GroupBy(it => new {it.StartTime, it.EndTime}).Where(grp => grp.Count() > 1);
                            Console.WriteLine("{0} duplicate items", duplicates.Count());*/
                            Console.WriteLine("----------------");
                        }
                        else
                        {
                            throw new ArgumentException("Not items found!");
                        }
                        
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex);
                    }
                }
                Console.WriteLine("----------------------");
            }

            Console.ReadLine();
        }
Пример #9
0
        static void Main(string[] args)
        {
            var parser = new SubtitlesParser.Classes.Parsers.SubParser();

            var allFiles = BrowseTestSubtitlesFiles();

            foreach (var file in allFiles)
            {
                var fileName = Path.GetFileName(file);
                using (var fileStream = File.OpenRead(file))
                {
                    try
                    {
                        var items = parser.ParseStream(fileStream);
                        if (items.Any())
                        {
                            Console.WriteLine("Parsing of file {0}: SUCCESS ({1} items)", fileName, items.Count);

                            /*Console.WriteLine();
                             * Console.WriteLine(string.Join(Environment.NewLine, items.Take(5)));*/
                        }
                        else
                        {
                            throw new ArgumentException("Not items found!");
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex);
                    }
                }
                Console.WriteLine("----------------------");
            }

            Console.ReadLine();
        }
Пример #10
0
        /// <summary>
        /// Process all files
        /// </summary>
        public void Run(string path, string outputFolder, string fromLang, string toLang, ApiType api, string apiKey, bool askForRetry = false, long maxCharactersToSend = 0, bool peek = false)
        {
            Console.WriteLine($"Version: {Assembly.GetExecutingAssembly().GetName().Version}");
            ShowNotice();
            CreateAndCheckOutputFolder(outputFolder);
            SetService(api);

            Console.WriteLine();

            var files            = GetFiles(path).ToList();
            int totalCharacters  = 0;
            int currentFileCount = 0;

            long remainingCharactersToSend = maxCharactersToSend;

            foreach (var file in files)
            {
                var parser = new SubtitlesParser.Classes.Parsers.SubParser();

                List <MySubtitleItem> myItems = new List <MySubtitleItem>();
                List <SubtitleItem>   items   = new List <SubtitleItem>();
                using (var stream = File.OpenRead(file))
                {
                    try
                    {
                        items = parser.ParseStream(stream);
                    }
                    catch (FormatException ex)
                    {
                        Console.WriteLine($"WARNING! Invalid subtitle file [{file}] Skipping file. Error message: {ex.Message}");
                        continue;
                    }

                    myItems.Capacity = items.Count;
                    foreach (var item in items)
                    {
                        myItems.Add(item.CopyToMySubtitleItem());
                    }
                }

                foreach (var item in myItems)
                {
                    if (item.EndTime <= 0 || item.StartTime <= 0 || item.Lines.Count <= 0)
                    {
                        throw new Exception($"Invalid subtitle file: {file}");
                    }

                    foreach (var line in item.Lines)
                    {
                        if (string.IsNullOrWhiteSpace(line))
                        {
                            throw new Exception($"Empty line detected in {file}");
                        }
                    }
                }

                if (myItems.Count == 0)
                {
                    throw new Exception($"File {file} has no subtitles...");
                }

                currentFileCount++;
                Console.WriteLine($"[START] File {file} has {myItems.Count} subtitle items");
                Console.WriteLine($"Processing file {currentFileCount} out of {files.Count}");

                var translatedItems = TranslationService.Translate(fromLang, toLang, myItems, apiKey, askForRetry, remainingCharactersToSend, peek);
                if (translatedItems.SkippedBecauseOfCharacterLimit)
                {
                    Console.WriteLine($"Character limit reached. Skipping current and remaining files...");
                    break;
                }

                ValidateItems(items, translatedItems.TranslatedItems.ToImmutableList());
                WriteToFile(translatedItems.TranslatedItems.ToImmutableList(), file, outputFolder);

                totalCharacters           += translatedItems.TranslatedCharacters;
                remainingCharactersToSend -= translatedItems.TranslatedCharacters;

                Console.WriteLine();
                Console.WriteLine($"Remaining characters to send: {remainingCharactersToSend}");
                Console.WriteLine($"Characters translated (of file): {translatedItems.TranslatedCharacters}");
                Console.WriteLine($"Current total characters sent: {totalCharacters}");
                Console.WriteLine($"[FINISH] Finished file: {file}");
                Console.WriteLine("-------------------------------------------------");
                Console.WriteLine();
            }

            Console.WriteLine();
            Console.WriteLine("Completed all files.");
            Console.WriteLine($"Remaining characters (unused): {remainingCharactersToSend}");
            Console.WriteLine($"Total characters used: {totalCharacters}");
            Console.WriteLine($"Char limit: {maxCharactersToSend}");
            Console.WriteLine($"Char limit - total characters used = {maxCharactersToSend - totalCharacters}");
        }